flonat-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/domain-reviewer.md +336 -0
- package/.claude/agents/fixer.md +226 -0
- package/.claude/agents/paper-critic.md +370 -0
- package/.claude/agents/peer-reviewer.md +289 -0
- package/.claude/agents/proposal-reviewer.md +215 -0
- package/.claude/agents/referee2-reviewer.md +367 -0
- package/.claude/agents/references/journal-referee-profiles.md +354 -0
- package/.claude/agents/references/paper-critic/council-personas.md +77 -0
- package/.claude/agents/references/paper-critic/council-prompts.md +198 -0
- package/.claude/agents/references/peer-reviewer/report-template.md +199 -0
- package/.claude/agents/references/peer-reviewer/sa-prompts.md +260 -0
- package/.claude/agents/references/peer-reviewer/security-scan.md +188 -0
- package/.claude/agents/references/proposal-reviewer/report-template.md +144 -0
- package/.claude/agents/references/proposal-reviewer/sa-prompts.md +149 -0
- package/.claude/agents/references/referee-config.md +114 -0
- package/.claude/agents/references/referee2-reviewer/audit-checklists.md +287 -0
- package/.claude/agents/references/referee2-reviewer/report-template.md +334 -0
- package/.claude/rules/design-before-results.md +52 -0
- package/.claude/rules/ignore-agents-md.md +17 -0
- package/.claude/rules/ignore-gemini-md.md +17 -0
- package/.claude/rules/lean-claude-md.md +45 -0
- package/.claude/rules/learn-tags.md +99 -0
- package/.claude/rules/overleaf-separation.md +67 -0
- package/.claude/rules/plan-first.md +175 -0
- package/.claude/rules/read-docs-first.md +50 -0
- package/.claude/rules/scope-discipline.md +28 -0
- package/.claude/settings.json +125 -0
- package/.context/current-focus.md +33 -0
- package/.context/preferences/priorities.md +36 -0
- package/.context/preferences/task-naming.md +28 -0
- package/.context/profile.md +29 -0
- package/.context/projects/_index.md +41 -0
- package/.context/projects/papers/nudge-exp.md +22 -0
- package/.context/projects/papers/uncertainty.md +31 -0
- package/.context/resources/claude-scientific-writer-review.md +48 -0
- package/.context/resources/cunningham-multi-analyst-agents.md +104 -0
- package/.context/resources/cunningham-multilang-code-audit.md +62 -0
- package/.context/resources/google-ai-co-scientist-review.md +72 -0
- package/.context/resources/karpathy-llm-council-review.md +58 -0
- package/.context/resources/multi-coder-reliability-protocol.md +175 -0
- package/.context/resources/pedro-santanna-takeaways.md +96 -0
- package/.context/resources/venue-rankings/abs_ajg_2024.csv +1823 -0
- package/.context/resources/venue-rankings/abs_ajg_2024_econ.csv +356 -0
- package/.context/resources/venue-rankings/cabs_4_4star_theory.csv +40 -0
- package/.context/resources/venue-rankings/core_2026.csv +801 -0
- package/.context/resources/venue-rankings.md +147 -0
- package/.context/workflows/README.md +69 -0
- package/.context/workflows/daily-review.md +91 -0
- package/.context/workflows/meeting-actions.md +108 -0
- package/.context/workflows/replication-protocol.md +155 -0
- package/.context/workflows/weekly-review.md +113 -0
- package/.mcp-server-biblio/formatters.py +158 -0
- package/.mcp-server-biblio/pyproject.toml +11 -0
- package/.mcp-server-biblio/server.py +678 -0
- package/.mcp-server-biblio/sources/__init__.py +14 -0
- package/.mcp-server-biblio/sources/base.py +73 -0
- package/.mcp-server-biblio/sources/formatters.py +83 -0
- package/.mcp-server-biblio/sources/models.py +22 -0
- package/.mcp-server-biblio/sources/multi_source.py +243 -0
- package/.mcp-server-biblio/sources/openalex_source.py +183 -0
- package/.mcp-server-biblio/sources/scopus_source.py +309 -0
- package/.mcp-server-biblio/sources/wos_source.py +508 -0
- package/.mcp-server-biblio/uv.lock +896 -0
- package/.scripts/README.md +161 -0
- package/.scripts/ai_pattern_density.py +446 -0
- package/.scripts/conf +445 -0
- package/.scripts/config.py +122 -0
- package/.scripts/count_inventory.py +275 -0
- package/.scripts/daily_digest.py +288 -0
- package/.scripts/done +177 -0
- package/.scripts/extract_meeting_actions.py +223 -0
- package/.scripts/focus +176 -0
- package/.scripts/generate-codex-agents-md.py +217 -0
- package/.scripts/inbox +194 -0
- package/.scripts/notion_helpers.py +325 -0
- package/.scripts/openalex/query_helpers.py +306 -0
- package/.scripts/papers +227 -0
- package/.scripts/query +223 -0
- package/.scripts/session-history.py +201 -0
- package/.scripts/skill-health.py +516 -0
- package/.scripts/skill-log-miner.py +273 -0
- package/.scripts/sync-to-codex.sh +252 -0
- package/.scripts/task +213 -0
- package/.scripts/tasks +190 -0
- package/.scripts/week +206 -0
- package/CLAUDE.md +197 -0
- package/LICENSE +21 -0
- package/MEMORY.md +38 -0
- package/README.md +269 -0
- package/docs/agents.md +44 -0
- package/docs/bibliography-setup.md +55 -0
- package/docs/council-mode.md +36 -0
- package/docs/getting-started.md +245 -0
- package/docs/hooks.md +38 -0
- package/docs/mcp-servers.md +82 -0
- package/docs/notion-setup.md +109 -0
- package/docs/rules.md +33 -0
- package/docs/scripts.md +303 -0
- package/docs/setup-overview/setup-overview.pdf +0 -0
- package/docs/skills.md +70 -0
- package/docs/system.md +159 -0
- package/hooks/block-destructive-git.sh +66 -0
- package/hooks/context-monitor.py +114 -0
- package/hooks/postcompact-restore.py +157 -0
- package/hooks/precompact-autosave.py +181 -0
- package/hooks/promise-checker.sh +124 -0
- package/hooks/protect-source-files.sh +81 -0
- package/hooks/resume-context-loader.sh +53 -0
- package/hooks/startup-context-loader.sh +102 -0
- package/package.json +51 -0
- package/packages/cli-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/cli-council/.github/workflows/claude.yml +50 -0
- package/packages/cli-council/README.md +100 -0
- package/packages/cli-council/pyproject.toml +43 -0
- package/packages/cli-council/src/cli_council/__init__.py +19 -0
- package/packages/cli-council/src/cli_council/__main__.py +185 -0
- package/packages/cli-council/src/cli_council/backends/__init__.py +8 -0
- package/packages/cli-council/src/cli_council/backends/base.py +81 -0
- package/packages/cli-council/src/cli_council/backends/claude.py +25 -0
- package/packages/cli-council/src/cli_council/backends/codex.py +27 -0
- package/packages/cli-council/src/cli_council/backends/gemini.py +26 -0
- package/packages/cli-council/src/cli_council/checkpoint.py +212 -0
- package/packages/cli-council/src/cli_council/config.py +51 -0
- package/packages/cli-council/src/cli_council/council.py +391 -0
- package/packages/cli-council/src/cli_council/models.py +46 -0
- package/packages/llm-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/llm-council/.github/workflows/claude.yml +50 -0
- package/packages/llm-council/README.md +453 -0
- package/packages/llm-council/pyproject.toml +42 -0
- package/packages/llm-council/src/llm_council/__init__.py +23 -0
- package/packages/llm-council/src/llm_council/__main__.py +259 -0
- package/packages/llm-council/src/llm_council/checkpoint.py +193 -0
- package/packages/llm-council/src/llm_council/client.py +253 -0
- package/packages/llm-council/src/llm_council/config.py +232 -0
- package/packages/llm-council/src/llm_council/council.py +482 -0
- package/packages/llm-council/src/llm_council/models.py +46 -0
- package/packages/mcp-bibliography/MEMORY.md +31 -0
- package/packages/mcp-bibliography/_app.py +226 -0
- package/packages/mcp-bibliography/formatters.py +158 -0
- package/packages/mcp-bibliography/log/2026-03-13-2100.md +35 -0
- package/packages/mcp-bibliography/pyproject.toml +15 -0
- package/packages/mcp-bibliography/run.sh +20 -0
- package/packages/mcp-bibliography/scholarly_formatters.py +83 -0
- package/packages/mcp-bibliography/server.py +1857 -0
- package/packages/mcp-bibliography/tools/__init__.py +28 -0
- package/packages/mcp-bibliography/tools/_registry.py +19 -0
- package/packages/mcp-bibliography/tools/altmetric.py +107 -0
- package/packages/mcp-bibliography/tools/core.py +92 -0
- package/packages/mcp-bibliography/tools/dblp.py +52 -0
- package/packages/mcp-bibliography/tools/openalex.py +296 -0
- package/packages/mcp-bibliography/tools/opencitations.py +102 -0
- package/packages/mcp-bibliography/tools/openreview.py +179 -0
- package/packages/mcp-bibliography/tools/orcid.py +131 -0
- package/packages/mcp-bibliography/tools/scholarly.py +575 -0
- package/packages/mcp-bibliography/tools/unpaywall.py +63 -0
- package/packages/mcp-bibliography/tools/zenodo.py +123 -0
- package/packages/mcp-bibliography/uv.lock +711 -0
- package/scripts/setup.sh +143 -0
- package/skills/beamer-deck/SKILL.md +199 -0
- package/skills/beamer-deck/references/quality-rubric.md +54 -0
- package/skills/beamer-deck/references/review-prompts.md +106 -0
- package/skills/bib-validate/SKILL.md +261 -0
- package/skills/bib-validate/references/council-mode.md +34 -0
- package/skills/bib-validate/references/deep-verify.md +79 -0
- package/skills/bib-validate/references/fix-mode.md +36 -0
- package/skills/bib-validate/references/openalex-verification.md +45 -0
- package/skills/bib-validate/references/preprint-check.md +31 -0
- package/skills/bib-validate/references/ref-manager-crossref.md +41 -0
- package/skills/bib-validate/references/report-template.md +82 -0
- package/skills/code-archaeology/SKILL.md +141 -0
- package/skills/code-review/SKILL.md +265 -0
- package/skills/code-review/references/quality-rubric.md +67 -0
- package/skills/consolidate-memory/SKILL.md +208 -0
- package/skills/context-status/SKILL.md +126 -0
- package/skills/creation-guard/SKILL.md +230 -0
- package/skills/devils-advocate/SKILL.md +130 -0
- package/skills/devils-advocate/references/competing-hypotheses.md +83 -0
- package/skills/init-project/SKILL.md +115 -0
- package/skills/init-project-course/references/memory-and-settings.md +92 -0
- package/skills/init-project-course/references/organise-templates.md +94 -0
- package/skills/init-project-course/skill.md +147 -0
- package/skills/init-project-light/skill.md +139 -0
- package/skills/init-project-research/SKILL.md +368 -0
- package/skills/init-project-research/references/atlas-pipeline-sync.md +70 -0
- package/skills/init-project-research/references/atlas-schema.md +81 -0
- package/skills/init-project-research/references/confirmation-report.md +39 -0
- package/skills/init-project-research/references/domain-profile-template.md +104 -0
- package/skills/init-project-research/references/interview-round3.md +34 -0
- package/skills/init-project-research/references/literature-discovery.md +43 -0
- package/skills/init-project-research/references/scaffold-details.md +197 -0
- package/skills/init-project-research/templates/field-calibration.md +60 -0
- package/skills/init-project-research/templates/pipeline-manifest.md +63 -0
- package/skills/init-project-research/templates/run-all.sh +116 -0
- package/skills/init-project-research/templates/seed-files.md +337 -0
- package/skills/insights-deck/SKILL.md +151 -0
- package/skills/interview-me/SKILL.md +157 -0
- package/skills/latex/SKILL.md +141 -0
- package/skills/latex/references/latex-configs.md +183 -0
- package/skills/latex-autofix/SKILL.md +230 -0
- package/skills/latex-autofix/references/known-errors.md +183 -0
- package/skills/latex-autofix/references/quality-rubric.md +50 -0
- package/skills/latex-health-check/SKILL.md +161 -0
- package/skills/learn/SKILL.md +220 -0
- package/skills/learn/scripts/validate_skill.py +265 -0
- package/skills/lessons-learned/SKILL.md +201 -0
- package/skills/literature/SKILL.md +335 -0
- package/skills/literature/references/agent-templates.md +393 -0
- package/skills/literature/references/bibliometric-apis.md +44 -0
- package/skills/literature/references/cli-council-search.md +79 -0
- package/skills/literature/references/openalex-api-guide.md +371 -0
- package/skills/literature/references/openalex-common-queries.md +381 -0
- package/skills/literature/references/openalex-workflows.md +248 -0
- package/skills/literature/references/reference-manager-sync.md +36 -0
- package/skills/literature/references/scopus-api-guide.md +208 -0
- package/skills/literature/references/wos-api-guide.md +308 -0
- package/skills/multi-perspective/SKILL.md +311 -0
- package/skills/multi-perspective/references/computational-many-analysts.md +77 -0
- package/skills/pipeline-manifest/SKILL.md +226 -0
- package/skills/pre-submission-report/SKILL.md +153 -0
- package/skills/process-reviews/SKILL.md +244 -0
- package/skills/process-reviews/references/rr-routing.md +101 -0
- package/skills/project-deck/SKILL.md +87 -0
- package/skills/project-safety/SKILL.md +135 -0
- package/skills/proofread/SKILL.md +254 -0
- package/skills/proofread/references/quality-rubric.md +104 -0
- package/skills/python-env/SKILL.md +57 -0
- package/skills/quarto-deck/SKILL.md +226 -0
- package/skills/quarto-deck/references/markdown-format.md +143 -0
- package/skills/quarto-deck/references/quality-rubric.md +54 -0
- package/skills/save-context/SKILL.md +174 -0
- package/skills/session-log/SKILL.md +98 -0
- package/skills/shared/concept-validation-gate.md +161 -0
- package/skills/shared/council-protocol.md +265 -0
- package/skills/shared/distribution-diagnostics.md +164 -0
- package/skills/shared/engagement-stratified-sampling.md +218 -0
- package/skills/shared/escalation-protocol.md +74 -0
- package/skills/shared/external-audit-protocol.md +205 -0
- package/skills/shared/intercoder-reliability.md +256 -0
- package/skills/shared/mcp-degradation.md +81 -0
- package/skills/shared/method-probing-questions.md +163 -0
- package/skills/shared/multi-language-conventions.md +143 -0
- package/skills/shared/paid-api-safety.md +174 -0
- package/skills/shared/palettes.md +90 -0
- package/skills/shared/progressive-disclosure.md +92 -0
- package/skills/shared/project-documentation-content.md +443 -0
- package/skills/shared/project-documentation-format.md +281 -0
- package/skills/shared/project-documentation.md +100 -0
- package/skills/shared/publication-output.md +138 -0
- package/skills/shared/quality-scoring.md +70 -0
- package/skills/shared/reference-resolution.md +77 -0
- package/skills/shared/research-quality-rubric.md +165 -0
- package/skills/shared/rhetoric-principles.md +54 -0
- package/skills/shared/skill-design-patterns.md +272 -0
- package/skills/shared/skill-index.md +240 -0
- package/skills/shared/system-documentation.md +334 -0
- package/skills/shared/tikz-rules.md +402 -0
- package/skills/shared/validation-tiers.md +121 -0
- package/skills/shared/venue-guides/README.md +46 -0
- package/skills/shared/venue-guides/cell_press_style.md +483 -0
- package/skills/shared/venue-guides/conferences_formatting.md +564 -0
- package/skills/shared/venue-guides/cs_conference_style.md +463 -0
- package/skills/shared/venue-guides/examples/cell_summary_example.md +247 -0
- package/skills/shared/venue-guides/examples/medical_structured_abstract.md +313 -0
- package/skills/shared/venue-guides/examples/nature_abstract_examples.md +213 -0
- package/skills/shared/venue-guides/examples/neurips_introduction_example.md +245 -0
- package/skills/shared/venue-guides/journals_formatting.md +486 -0
- package/skills/shared/venue-guides/medical_journal_styles.md +535 -0
- package/skills/shared/venue-guides/ml_conference_style.md +556 -0
- package/skills/shared/venue-guides/nature_science_style.md +405 -0
- package/skills/shared/venue-guides/reviewer_expectations.md +417 -0
- package/skills/shared/venue-guides/venue_writing_styles.md +321 -0
- package/skills/split-pdf/SKILL.md +172 -0
- package/skills/split-pdf/methodology.md +48 -0
- package/skills/sync-notion/SKILL.md +93 -0
- package/skills/system-audit/SKILL.md +157 -0
- package/skills/system-audit/references/sub-agent-prompts.md +294 -0
- package/skills/task-management/SKILL.md +131 -0
- package/skills/update-focus/SKILL.md +204 -0
- package/skills/update-project-doc/SKILL.md +194 -0
- package/skills/validate-bib/SKILL.md +242 -0
- package/skills/validate-bib/references/council-mode.md +34 -0
- package/skills/validate-bib/references/deep-verify.md +71 -0
- package/skills/validate-bib/references/openalex-verification.md +45 -0
- package/skills/validate-bib/references/preprint-check.md +31 -0
- package/skills/validate-bib/references/report-template.md +62 -0
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
"""3-stage council orchestration via local CLI tools.
|
|
2
|
+
|
|
3
|
+
Stage 1 — Independent assessments (parallel subprocess calls)
|
|
4
|
+
Stage 2 — Anonymised peer review (each backend reviews all assessments)
|
|
5
|
+
Stage 3 — Chairman synthesis (single backend reads everything, produces final)
|
|
6
|
+
|
|
7
|
+
Supports checkpoint-based session resumption (inspired by Owlex) and
|
|
8
|
+
atomic file-based state (inspired by agents-council).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import logging
|
|
15
|
+
import re
|
|
16
|
+
from collections import defaultdict
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from time import perf_counter
|
|
19
|
+
|
|
20
|
+
from cli_council.backends.base import CLIBackend
|
|
21
|
+
from cli_council.checkpoint import CouncilCheckpointer
|
|
22
|
+
from cli_council.config import DEFAULT_CHAIRMAN, DEFAULT_COUNCIL_BACKENDS, STAGE_TIMEOUT
|
|
23
|
+
from cli_council.models import Assessment, CouncilMeta, CouncilResult, PeerReview
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _get_backend(name: str) -> CLIBackend:
|
|
29
|
+
"""Instantiate a backend by name."""
|
|
30
|
+
from cli_council.backends.claude import ClaudeBackend
|
|
31
|
+
from cli_council.backends.codex import CodexBackend
|
|
32
|
+
from cli_council.backends.gemini import GeminiBackend
|
|
33
|
+
|
|
34
|
+
registry: dict[str, type[CLIBackend]] = {
|
|
35
|
+
"gemini": GeminiBackend,
|
|
36
|
+
"codex": CodexBackend,
|
|
37
|
+
"claude": ClaudeBackend,
|
|
38
|
+
}
|
|
39
|
+
cls = registry.get(name)
|
|
40
|
+
if cls is None:
|
|
41
|
+
raise ValueError(f"Unknown backend: {name!r}. Available: {list(registry)}")
|
|
42
|
+
return cls()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class CouncilRunner:
|
|
46
|
+
"""Orchestrates a multi-model council using local CLI tools."""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
backends: list[str] | None = None,
|
|
51
|
+
chairman: str | None = None,
|
|
52
|
+
timeout: int = STAGE_TIMEOUT,
|
|
53
|
+
cwd: str | None = None,
|
|
54
|
+
checkpoint_dir: str | Path | None = None,
|
|
55
|
+
) -> None:
|
|
56
|
+
self.backend_names = backends or DEFAULT_COUNCIL_BACKENDS
|
|
57
|
+
self.chairman_name = chairman or DEFAULT_CHAIRMAN
|
|
58
|
+
self.timeout = timeout
|
|
59
|
+
self.cwd = cwd
|
|
60
|
+
self._checkpoint_dir = Path(checkpoint_dir) if checkpoint_dir else None
|
|
61
|
+
|
|
62
|
+
# Instantiate and validate backends
|
|
63
|
+
self.backends: dict[str, CLIBackend] = {}
|
|
64
|
+
for name in self.backend_names:
|
|
65
|
+
backend = _get_backend(name)
|
|
66
|
+
if not backend.is_available():
|
|
67
|
+
logger.warning("Backend %s not found on PATH — skipping", name)
|
|
68
|
+
continue
|
|
69
|
+
self.backends[name] = backend
|
|
70
|
+
|
|
71
|
+
if self.chairman_name not in self.backends:
|
|
72
|
+
chairman_backend = _get_backend(self.chairman_name)
|
|
73
|
+
if chairman_backend.is_available():
|
|
74
|
+
self.backends[self.chairman_name] = chairman_backend
|
|
75
|
+
|
|
76
|
+
async def run(
|
|
77
|
+
self,
|
|
78
|
+
prompt: str,
|
|
79
|
+
*,
|
|
80
|
+
system_context: str = "",
|
|
81
|
+
resume: bool = False,
|
|
82
|
+
) -> CouncilResult:
|
|
83
|
+
"""Run the full 3-stage council process.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
prompt:
|
|
88
|
+
The main question or task for the council.
|
|
89
|
+
system_context:
|
|
90
|
+
Optional context prepended to all prompts (project description,
|
|
91
|
+
file contents, constraints, etc.).
|
|
92
|
+
resume:
|
|
93
|
+
If True and checkpoint_dir was provided, resume from the last
|
|
94
|
+
completed stage instead of starting fresh.
|
|
95
|
+
"""
|
|
96
|
+
errors: list[str] = []
|
|
97
|
+
t_total = perf_counter()
|
|
98
|
+
|
|
99
|
+
full_prompt = f"{system_context}\n\n{prompt}".strip() if system_context else prompt
|
|
100
|
+
|
|
101
|
+
# Set up checkpointing
|
|
102
|
+
ckpt = None
|
|
103
|
+
resume_from = 0
|
|
104
|
+
if self._checkpoint_dir:
|
|
105
|
+
if resume:
|
|
106
|
+
# Find the latest run and resume from it
|
|
107
|
+
probe = CouncilCheckpointer(self._checkpoint_dir)
|
|
108
|
+
latest_run = probe.find_latest_run()
|
|
109
|
+
if latest_run:
|
|
110
|
+
ckpt = CouncilCheckpointer(self._checkpoint_dir, run_id=latest_run)
|
|
111
|
+
resume_from = ckpt.last_completed_stage()
|
|
112
|
+
if resume_from > 0:
|
|
113
|
+
logger.info(
|
|
114
|
+
"Resuming run %s from stage %d",
|
|
115
|
+
latest_run, resume_from + 1,
|
|
116
|
+
)
|
|
117
|
+
else:
|
|
118
|
+
ckpt = CouncilCheckpointer(self._checkpoint_dir)
|
|
119
|
+
else:
|
|
120
|
+
ckpt = CouncilCheckpointer(self._checkpoint_dir)
|
|
121
|
+
else:
|
|
122
|
+
ckpt = CouncilCheckpointer(self._checkpoint_dir)
|
|
123
|
+
|
|
124
|
+
# Stage 1: Independent assessments
|
|
125
|
+
stage1_ms = 0
|
|
126
|
+
if resume_from >= 1 and ckpt:
|
|
127
|
+
# Resume: load from checkpoint
|
|
128
|
+
saved = ckpt.load_stage1()
|
|
129
|
+
if saved:
|
|
130
|
+
assessments = [Assessment(**a) for a in saved]
|
|
131
|
+
logger.info("Stage 1: loaded %d assessments from checkpoint", len(assessments))
|
|
132
|
+
else:
|
|
133
|
+
t1 = perf_counter()
|
|
134
|
+
assessments = await self._stage1(full_prompt, errors)
|
|
135
|
+
stage1_ms = int((perf_counter() - t1) * 1000)
|
|
136
|
+
else:
|
|
137
|
+
t1 = perf_counter()
|
|
138
|
+
assessments = await self._stage1(full_prompt, errors)
|
|
139
|
+
stage1_ms = int((perf_counter() - t1) * 1000)
|
|
140
|
+
|
|
141
|
+
if len(assessments) < 2:
|
|
142
|
+
errors.append(f"Only {len(assessments)} backend(s) responded — need at least 2 for peer review")
|
|
143
|
+
return CouncilResult(
|
|
144
|
+
synthesis=assessments[0].text if assessments else "",
|
|
145
|
+
assessments=assessments,
|
|
146
|
+
peer_reviews=[],
|
|
147
|
+
meta=CouncilMeta(
|
|
148
|
+
backends_used=[a.backend for a in assessments],
|
|
149
|
+
stage1_ms=stage1_ms,
|
|
150
|
+
total_ms=int((perf_counter() - t_total) * 1000),
|
|
151
|
+
chairman_backend=self.chairman_name,
|
|
152
|
+
errors=errors,
|
|
153
|
+
),
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Assign anonymised labels
|
|
157
|
+
for i, a in enumerate(assessments):
|
|
158
|
+
a.label = f"Assessment {chr(65 + i)}"
|
|
159
|
+
|
|
160
|
+
# Checkpoint Stage 1
|
|
161
|
+
if ckpt and resume_from < 1:
|
|
162
|
+
ckpt.save_stage1(
|
|
163
|
+
[a.model_dump() for a in assessments],
|
|
164
|
+
[a.backend for a in assessments],
|
|
165
|
+
)
|
|
166
|
+
pending = ckpt.pending_participants(
|
|
167
|
+
self.backend_names,
|
|
168
|
+
[a.backend for a in assessments],
|
|
169
|
+
)
|
|
170
|
+
if pending:
|
|
171
|
+
logger.warning("Stage 1: pending backends: %s", pending)
|
|
172
|
+
|
|
173
|
+
# Stage 2: Peer review
|
|
174
|
+
stage2_ms = 0
|
|
175
|
+
if resume_from >= 2 and ckpt:
|
|
176
|
+
saved = ckpt.load_stage2()
|
|
177
|
+
if saved:
|
|
178
|
+
reviews_data, _ = saved
|
|
179
|
+
peer_reviews = [PeerReview(**r) for r in reviews_data]
|
|
180
|
+
logger.info("Stage 2: loaded %d reviews from checkpoint", len(peer_reviews))
|
|
181
|
+
else:
|
|
182
|
+
t2 = perf_counter()
|
|
183
|
+
peer_reviews = await self._stage2(prompt, assessments, errors)
|
|
184
|
+
stage2_ms = int((perf_counter() - t2) * 1000)
|
|
185
|
+
else:
|
|
186
|
+
t2 = perf_counter()
|
|
187
|
+
peer_reviews = await self._stage2(prompt, assessments, errors)
|
|
188
|
+
stage2_ms = int((perf_counter() - t2) * 1000)
|
|
189
|
+
|
|
190
|
+
# Checkpoint Stage 2
|
|
191
|
+
if ckpt and resume_from < 2:
|
|
192
|
+
ckpt.save_stage2(
|
|
193
|
+
[r.model_dump() for r in peer_reviews],
|
|
194
|
+
[r.backend for r in peer_reviews],
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# Stage 3: Chairman synthesis
|
|
198
|
+
t3 = perf_counter()
|
|
199
|
+
synthesis = await self._stage3(prompt, assessments, peer_reviews, errors)
|
|
200
|
+
stage3_ms = int((perf_counter() - t3) * 1000)
|
|
201
|
+
|
|
202
|
+
# Checkpoint Stage 3
|
|
203
|
+
if ckpt:
|
|
204
|
+
ckpt.save_stage3(synthesis, self.chairman_name)
|
|
205
|
+
|
|
206
|
+
total_ms = int((perf_counter() - t_total) * 1000)
|
|
207
|
+
|
|
208
|
+
return CouncilResult(
|
|
209
|
+
synthesis=synthesis,
|
|
210
|
+
assessments=assessments,
|
|
211
|
+
peer_reviews=peer_reviews,
|
|
212
|
+
meta=CouncilMeta(
|
|
213
|
+
backends_used=[a.backend for a in assessments],
|
|
214
|
+
stage1_ms=stage1_ms,
|
|
215
|
+
stage2_ms=stage2_ms,
|
|
216
|
+
stage3_ms=stage3_ms,
|
|
217
|
+
total_ms=total_ms,
|
|
218
|
+
chairman_backend=self.chairman_name,
|
|
219
|
+
errors=errors,
|
|
220
|
+
),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# ------------------------------------------------------------------
|
|
224
|
+
# Stage 1: Independent Assessments
|
|
225
|
+
# ------------------------------------------------------------------
|
|
226
|
+
|
|
227
|
+
async def _stage1(
|
|
228
|
+
self, prompt: str, errors: list[str],
|
|
229
|
+
) -> list[Assessment]:
|
|
230
|
+
async def _query(name: str, backend: CLIBackend) -> Assessment | None:
|
|
231
|
+
try:
|
|
232
|
+
text, elapsed = await backend.run(
|
|
233
|
+
prompt, timeout=self.timeout, cwd=self.cwd,
|
|
234
|
+
)
|
|
235
|
+
return Assessment(
|
|
236
|
+
backend=name,
|
|
237
|
+
model=backend.default_model,
|
|
238
|
+
text=text,
|
|
239
|
+
elapsed_ms=elapsed,
|
|
240
|
+
)
|
|
241
|
+
except Exception as exc:
|
|
242
|
+
msg = f"Stage 1: {name} failed — {exc}"
|
|
243
|
+
logger.warning(msg)
|
|
244
|
+
errors.append(msg)
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
tasks = [_query(name, b) for name, b in self.backends.items()]
|
|
248
|
+
results = await asyncio.gather(*tasks)
|
|
249
|
+
assessments = [r for r in results if r is not None]
|
|
250
|
+
|
|
251
|
+
logger.info(
|
|
252
|
+
"Stage 1: %d/%d backends responded",
|
|
253
|
+
len(assessments), len(self.backends),
|
|
254
|
+
)
|
|
255
|
+
return assessments
|
|
256
|
+
|
|
257
|
+
# ------------------------------------------------------------------
|
|
258
|
+
# Stage 2: Anonymised Peer Review
|
|
259
|
+
# ------------------------------------------------------------------
|
|
260
|
+
|
|
261
|
+
async def _stage2(
|
|
262
|
+
self,
|
|
263
|
+
original_prompt: str,
|
|
264
|
+
assessments: list[Assessment],
|
|
265
|
+
errors: list[str],
|
|
266
|
+
) -> list[PeerReview]:
|
|
267
|
+
assessments_block = "\n\n---\n\n".join(
|
|
268
|
+
f"**{a.label}:**\n{a.text}" for a in assessments
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
review_prompt = f"""You are reviewing multiple independent assessments of the same question.
|
|
272
|
+
|
|
273
|
+
ORIGINAL QUESTION:
|
|
274
|
+
{original_prompt}
|
|
275
|
+
|
|
276
|
+
ASSESSMENTS:
|
|
277
|
+
|
|
278
|
+
{assessments_block}
|
|
279
|
+
|
|
280
|
+
YOUR TASK:
|
|
281
|
+
1. Evaluate each assessment: strengths, weaknesses, gaps.
|
|
282
|
+
2. Identify areas of AGREEMENT across assessments.
|
|
283
|
+
3. Identify areas of DISAGREEMENT — which position is more convincing and why.
|
|
284
|
+
4. Provide a final ranking from best to worst.
|
|
285
|
+
|
|
286
|
+
IMPORTANT: End your review with "FINAL RANKING:" followed by a numbered list.
|
|
287
|
+
Each line: number, period, space, then the assessment label (e.g. "1. Assessment A")."""
|
|
288
|
+
|
|
289
|
+
async def _review(name: str, backend: CLIBackend) -> PeerReview | None:
|
|
290
|
+
try:
|
|
291
|
+
text, elapsed = await backend.run(
|
|
292
|
+
review_prompt, timeout=self.timeout, cwd=self.cwd,
|
|
293
|
+
)
|
|
294
|
+
ranking = self._parse_ranking(text)
|
|
295
|
+
return PeerReview(
|
|
296
|
+
backend=name,
|
|
297
|
+
model=backend.default_model,
|
|
298
|
+
review_text=text,
|
|
299
|
+
parsed_ranking=ranking,
|
|
300
|
+
elapsed_ms=elapsed,
|
|
301
|
+
)
|
|
302
|
+
except Exception as exc:
|
|
303
|
+
msg = f"Stage 2: {name} failed — {exc}"
|
|
304
|
+
logger.warning(msg)
|
|
305
|
+
errors.append(msg)
|
|
306
|
+
return None
|
|
307
|
+
|
|
308
|
+
tasks = [_review(name, b) for name, b in self.backends.items()]
|
|
309
|
+
results = await asyncio.gather(*tasks)
|
|
310
|
+
reviews = [r for r in results if r is not None]
|
|
311
|
+
|
|
312
|
+
logger.info(
|
|
313
|
+
"Stage 2: %d/%d backends reviewed",
|
|
314
|
+
len(reviews), len(self.backends),
|
|
315
|
+
)
|
|
316
|
+
return reviews
|
|
317
|
+
|
|
318
|
+
# ------------------------------------------------------------------
|
|
319
|
+
# Stage 3: Chairman Synthesis
|
|
320
|
+
# ------------------------------------------------------------------
|
|
321
|
+
|
|
322
|
+
async def _stage3(
|
|
323
|
+
self,
|
|
324
|
+
original_prompt: str,
|
|
325
|
+
assessments: list[Assessment],
|
|
326
|
+
peer_reviews: list[PeerReview],
|
|
327
|
+
errors: list[str],
|
|
328
|
+
) -> str:
|
|
329
|
+
assessments_block = "\n\n".join(
|
|
330
|
+
f"**{a.label}** (by {a.backend}):\n{a.text}" for a in assessments
|
|
331
|
+
)
|
|
332
|
+
reviews_block = "\n\n".join(
|
|
333
|
+
f"**Review by {r.backend}:**\n{r.review_text}" for r in peer_reviews
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
chairman_prompt = f"""You are the Chairman of a multi-model council. Multiple AI systems have independently assessed the same question, then peer-reviewed each other's assessments.
|
|
337
|
+
|
|
338
|
+
ORIGINAL QUESTION:
|
|
339
|
+
{original_prompt}
|
|
340
|
+
|
|
341
|
+
STAGE 1 — Individual Assessments:
|
|
342
|
+
{assessments_block}
|
|
343
|
+
|
|
344
|
+
STAGE 2 — Peer Reviews:
|
|
345
|
+
{reviews_block}
|
|
346
|
+
|
|
347
|
+
YOUR TASK AS CHAIRMAN:
|
|
348
|
+
1. Consider all individual assessments and their insights.
|
|
349
|
+
2. Consider the peer reviews and what they reveal about quality and disagreements.
|
|
350
|
+
3. Identify areas of strong consensus vs. genuine disagreement.
|
|
351
|
+
4. Synthesise a SINGLE, comprehensive answer that represents the council's collective wisdom.
|
|
352
|
+
|
|
353
|
+
Where the council agrees, reflect that consensus. Where they disagree, use your judgement to select the most well-reasoned position and explain why."""
|
|
354
|
+
|
|
355
|
+
chairman_backend = self.backends.get(self.chairman_name)
|
|
356
|
+
if chairman_backend is None:
|
|
357
|
+
errors.append(f"Chairman backend {self.chairman_name!r} not available")
|
|
358
|
+
return assessments[0].text if assessments else ""
|
|
359
|
+
|
|
360
|
+
try:
|
|
361
|
+
text, _ = await chairman_backend.run(
|
|
362
|
+
chairman_prompt, timeout=self.timeout * 2, cwd=self.cwd,
|
|
363
|
+
)
|
|
364
|
+
return text
|
|
365
|
+
except Exception as exc:
|
|
366
|
+
msg = f"Stage 3: chairman ({self.chairman_name}) failed — {exc}"
|
|
367
|
+
logger.warning(msg)
|
|
368
|
+
errors.append(msg)
|
|
369
|
+
return assessments[0].text if assessments else ""
|
|
370
|
+
|
|
371
|
+
# ------------------------------------------------------------------
|
|
372
|
+
# Utilities
|
|
373
|
+
# ------------------------------------------------------------------
|
|
374
|
+
|
|
375
|
+
@staticmethod
|
|
376
|
+
def _parse_ranking(text: str) -> list[str]:
|
|
377
|
+
"""Extract assessment ranking from review text."""
|
|
378
|
+
if "FINAL RANKING:" in text:
|
|
379
|
+
section = text.split("FINAL RANKING:", 1)[1]
|
|
380
|
+
numbered = re.findall(r"\d+\.\s*Assessment [A-Z]", section)
|
|
381
|
+
if numbered:
|
|
382
|
+
return [
|
|
383
|
+
re.search(r"Assessment [A-Z]", m).group()
|
|
384
|
+
for m in numbered
|
|
385
|
+
]
|
|
386
|
+
return re.findall(r"Assessment [A-Z]", section)
|
|
387
|
+
return re.findall(r"Assessment [A-Z]", text)
|
|
388
|
+
|
|
389
|
+
def available_backends(self) -> list[str]:
|
|
390
|
+
"""Return names of backends that are installed and available."""
|
|
391
|
+
return list(self.backends.keys())
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Pydantic models for CLI council results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Assessment(BaseModel):
|
|
9
|
+
"""One council member's individual assessment (Stage 1)."""
|
|
10
|
+
|
|
11
|
+
backend: str # "gemini", "codex", "claude"
|
|
12
|
+
model: str # model name/ID used by the backend
|
|
13
|
+
text: str # raw text response
|
|
14
|
+
label: str = "" # anonymised label, e.g. "Assessment A"
|
|
15
|
+
elapsed_ms: int = 0
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PeerReview(BaseModel):
|
|
19
|
+
"""One council member's peer review of all assessments (Stage 2)."""
|
|
20
|
+
|
|
21
|
+
backend: str
|
|
22
|
+
model: str
|
|
23
|
+
review_text: str
|
|
24
|
+
parsed_ranking: list[str] = Field(default_factory=list)
|
|
25
|
+
elapsed_ms: int = 0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CouncilMeta(BaseModel):
|
|
29
|
+
"""Metadata for a council run."""
|
|
30
|
+
|
|
31
|
+
backends_used: list[str]
|
|
32
|
+
stage1_ms: int = 0
|
|
33
|
+
stage2_ms: int = 0
|
|
34
|
+
stage3_ms: int = 0
|
|
35
|
+
total_ms: int = 0
|
|
36
|
+
chairman_backend: str = "claude"
|
|
37
|
+
errors: list[str] = Field(default_factory=list)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CouncilResult(BaseModel):
|
|
41
|
+
"""Full council deliberation result."""
|
|
42
|
+
|
|
43
|
+
synthesis: str
|
|
44
|
+
assessments: list[Assessment]
|
|
45
|
+
peer_reviews: list[PeerReview]
|
|
46
|
+
meta: CouncilMeta
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
name: Claude Code Review
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
types: [opened, synchronize, ready_for_review, reopened]
|
|
6
|
+
# Optional: Only run on specific file changes
|
|
7
|
+
# paths:
|
|
8
|
+
# - "src/**/*.ts"
|
|
9
|
+
# - "src/**/*.tsx"
|
|
10
|
+
# - "src/**/*.js"
|
|
11
|
+
# - "src/**/*.jsx"
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
claude-review:
|
|
15
|
+
# Optional: Filter by PR author
|
|
16
|
+
# if: |
|
|
17
|
+
# github.event.pull_request.user.login == 'external-contributor' ||
|
|
18
|
+
# github.event.pull_request.user.login == 'new-developer' ||
|
|
19
|
+
# github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
|
|
20
|
+
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
permissions:
|
|
23
|
+
contents: read
|
|
24
|
+
pull-requests: read
|
|
25
|
+
issues: read
|
|
26
|
+
id-token: write
|
|
27
|
+
|
|
28
|
+
steps:
|
|
29
|
+
- name: Checkout repository
|
|
30
|
+
uses: actions/checkout@v4
|
|
31
|
+
with:
|
|
32
|
+
fetch-depth: 1
|
|
33
|
+
|
|
34
|
+
- name: Run Claude Code Review
|
|
35
|
+
id: claude-review
|
|
36
|
+
uses: anthropics/claude-code-action@v1
|
|
37
|
+
with:
|
|
38
|
+
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
|
39
|
+
plugin_marketplaces: 'https://github.com/anthropics/claude-code.git'
|
|
40
|
+
plugins: 'code-review@claude-code-plugins'
|
|
41
|
+
prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}'
|
|
42
|
+
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
|
43
|
+
# or https://code.claude.com/docs/en/cli-reference for available options
|
|
44
|
+
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
name: Claude Code
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
issue_comment:
|
|
5
|
+
types: [created]
|
|
6
|
+
pull_request_review_comment:
|
|
7
|
+
types: [created]
|
|
8
|
+
issues:
|
|
9
|
+
types: [opened, assigned]
|
|
10
|
+
pull_request_review:
|
|
11
|
+
types: [submitted]
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
claude:
|
|
15
|
+
if: |
|
|
16
|
+
(github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
|
|
17
|
+
(github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
|
|
18
|
+
(github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
|
|
19
|
+
(github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
permissions:
|
|
22
|
+
contents: read
|
|
23
|
+
pull-requests: read
|
|
24
|
+
issues: read
|
|
25
|
+
id-token: write
|
|
26
|
+
actions: read # Required for Claude to read CI results on PRs
|
|
27
|
+
steps:
|
|
28
|
+
- name: Checkout repository
|
|
29
|
+
uses: actions/checkout@v4
|
|
30
|
+
with:
|
|
31
|
+
fetch-depth: 1
|
|
32
|
+
|
|
33
|
+
- name: Run Claude Code
|
|
34
|
+
id: claude
|
|
35
|
+
uses: anthropics/claude-code-action@v1
|
|
36
|
+
with:
|
|
37
|
+
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
|
38
|
+
|
|
39
|
+
# This is an optional setting that allows Claude to read CI results on PRs
|
|
40
|
+
additional_permissions: |
|
|
41
|
+
actions: read
|
|
42
|
+
|
|
43
|
+
# Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
|
|
44
|
+
# prompt: 'Update the pull request description to include a summary of changes.'
|
|
45
|
+
|
|
46
|
+
# Optional: Add claude_args to customize behavior and configuration
|
|
47
|
+
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
|
48
|
+
# or https://code.claude.com/docs/en/cli-reference for available options
|
|
49
|
+
# claude_args: '--allowed-tools Bash(gh pr:*)'
|
|
50
|
+
|