crosscheck-ai 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. crosscheck_ai-1.0.0/LICENSE +14 -0
  2. crosscheck_ai-1.0.0/PKG-INFO +96 -0
  3. crosscheck_ai-1.0.0/README.md +44 -0
  4. crosscheck_ai-1.0.0/crosscheck/__init__.py +34 -0
  5. crosscheck_ai-1.0.0/crosscheck/__main__.py +5 -0
  6. crosscheck_ai-1.0.0/crosscheck/agents/__init__.py +17 -0
  7. crosscheck_ai-1.0.0/crosscheck/agents/analyzer.py +156 -0
  8. crosscheck_ai-1.0.0/crosscheck/agents/coder.py +84 -0
  9. crosscheck_ai-1.0.0/crosscheck/agents/supervisor.py +205 -0
  10. crosscheck_ai-1.0.0/crosscheck/cache.py +366 -0
  11. crosscheck_ai-1.0.0/crosscheck/cli.py +787 -0
  12. crosscheck_ai-1.0.0/crosscheck/cli_extensions.py +368 -0
  13. crosscheck_ai-1.0.0/crosscheck/client.py +189 -0
  14. crosscheck_ai-1.0.0/crosscheck/config.py +275 -0
  15. crosscheck_ai-1.0.0/crosscheck/context_builder.py +327 -0
  16. crosscheck_ai-1.0.0/crosscheck/core.py +317 -0
  17. crosscheck_ai-1.0.0/crosscheck/dashboard.py +460 -0
  18. crosscheck_ai-1.0.0/crosscheck/diff.py +234 -0
  19. crosscheck_ai-1.0.0/crosscheck/local.py +334 -0
  20. crosscheck_ai-1.0.0/crosscheck/models.py +541 -0
  21. crosscheck_ai-1.0.0/crosscheck/monitor.py +216 -0
  22. crosscheck_ai-1.0.0/crosscheck/observer.py +367 -0
  23. crosscheck_ai-1.0.0/crosscheck/policy.py +332 -0
  24. crosscheck_ai-1.0.0/crosscheck/pr_bot.py +470 -0
  25. crosscheck_ai-1.0.0/crosscheck/prompts.py +208 -0
  26. crosscheck_ai-1.0.0/crosscheck/reporter.py +423 -0
  27. crosscheck_ai-1.0.0/crosscheck/sandbox.py +338 -0
  28. crosscheck_ai-1.0.0/crosscheck/streaming.py +316 -0
  29. crosscheck_ai-1.0.0/crosscheck/team/__init__.py +30 -0
  30. crosscheck_ai-1.0.0/crosscheck/team/chat.py +206 -0
  31. crosscheck_ai-1.0.0/crosscheck/team/command_parser.py +161 -0
  32. crosscheck_ai-1.0.0/crosscheck/team/language.py +76 -0
  33. crosscheck_ai-1.0.0/crosscheck/team/roles.py +213 -0
  34. crosscheck_ai-1.0.0/crosscheck/team/session.py +384 -0
  35. crosscheck_ai-1.0.0/crosscheck_ai.egg-info/PKG-INFO +96 -0
  36. crosscheck_ai-1.0.0/crosscheck_ai.egg-info/SOURCES.txt +47 -0
  37. crosscheck_ai-1.0.0/crosscheck_ai.egg-info/dependency_links.txt +1 -0
  38. crosscheck_ai-1.0.0/crosscheck_ai.egg-info/entry_points.txt +2 -0
  39. crosscheck_ai-1.0.0/crosscheck_ai.egg-info/requires.txt +36 -0
  40. crosscheck_ai-1.0.0/crosscheck_ai.egg-info/top_level.txt +1 -0
  41. crosscheck_ai-1.0.0/pyproject.toml +75 -0
  42. crosscheck_ai-1.0.0/setup.cfg +4 -0
  43. crosscheck_ai-1.0.0/tests/test_cli.py +271 -0
  44. crosscheck_ai-1.0.0/tests/test_core.py +196 -0
  45. crosscheck_ai-1.0.0/tests/test_models.py +221 -0
  46. crosscheck_ai-1.0.0/tests/test_observer.py +522 -0
  47. crosscheck_ai-1.0.0/tests/test_phase2.py +595 -0
  48. crosscheck_ai-1.0.0/tests/test_supervisor.py +367 -0
  49. crosscheck_ai-1.0.0/tests/test_team.py +584 -0
@@ -0,0 +1,14 @@
1
+ Copyright (c) 2026-2027 Steddy Nova Srl. All Rights Reserved.
2
+
3
+ PROPRIETARY SOFTWARE LICENSE
4
+
5
+ This software and associated documentation files (the "Software") are the
6
+ exclusive property of Steddy Nova Srl. Unauthorized copying, distribution,
7
+ modification, or use of this Software, in whole or in part, is strictly
8
+ prohibited without prior written consent from Steddy Nova Srl.
9
+
10
+ The Software is provided "AS IS", without warranty of any kind, express or
11
+ implied. In no event shall Steddy Nova Srl be liable for any claim, damages,
12
+ or other liability arising from the use of the Software.
13
+
14
+ For licensing inquiries: info@yuyai.pro
@@ -0,0 +1,96 @@
1
+ Metadata-Version: 2.4
2
+ Name: crosscheck-ai
3
+ Version: 1.0.0
4
+ Summary: AI Dev Team -- multi-agent coding pipeline where Claude codes and other models advise, debug, review, and plan. Built on OpenRouter.
5
+ Author-email: Steddy Nova Srl <info@yuyai.pro>
6
+ License: Proprietary
7
+ Project-URL: Homepage, https://github.com/Nomadu27/crosscheck-ai
8
+ Project-URL: Repository, https://github.com/Nomadu27/crosscheck-ai
9
+ Project-URL: Issues, https://github.com/Nomadu27/crosscheck-ai/issues
10
+ Keywords: ai,llm,review,openrouter,multi-agent,code-review,insaits,observer
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: Other/Proprietary License
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Software Development :: Quality Assurance
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: click>=8.1
23
+ Requires-Dist: rich>=13.0
24
+ Requires-Dist: httpx>=0.27
25
+ Requires-Dist: pydantic>=2.0
26
+ Requires-Dist: questionary>=2.0
27
+ Requires-Dist: watchdog>=4.0
28
+ Requires-Dist: python-dotenv>=1.0
29
+ Provides-Extra: monitor
30
+ Requires-Dist: insa-its>=1.0; extra == "monitor"
31
+ Provides-Extra: webhook
32
+ Requires-Dist: fastapi>=0.110; extra == "webhook"
33
+ Requires-Dist: uvicorn>=0.29; extra == "webhook"
34
+ Provides-Extra: dashboard
35
+ Requires-Dist: fastapi>=0.110; extra == "dashboard"
36
+ Requires-Dist: uvicorn>=0.29; extra == "dashboard"
37
+ Provides-Extra: policy
38
+ Requires-Dist: pyyaml>=6.0; extra == "policy"
39
+ Provides-Extra: local
40
+ Provides-Extra: all
41
+ Requires-Dist: insa-its>=1.0; extra == "all"
42
+ Requires-Dist: fastapi>=0.110; extra == "all"
43
+ Requires-Dist: uvicorn>=0.29; extra == "all"
44
+ Requires-Dist: pyyaml>=6.0; extra == "all"
45
+ Provides-Extra: dev
46
+ Requires-Dist: pytest>=8.0; extra == "dev"
47
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
48
+ Requires-Dist: pytest-mock>=3.14; extra == "dev"
49
+ Requires-Dist: ruff>=0.4; extra == "dev"
50
+ Requires-Dist: mypy>=1.10; extra == "dev"
51
+ Dynamic: license-file
52
+
53
+ # crosscheck-ai
54
+
55
+ **AI Dev Team** — multi-agent coding pipeline where Claude codes and other models advise, debug, review, and plan.
56
+
57
+ Built on OpenRouter. 31 verified models. 229 tests passing.
58
+
59
+ ## Features
60
+
61
+ - **AI Dev Team**: Claude writes code, GPT-5 architects, DeepSeek debugs, Grok scans security, Gemini analyzes flow
62
+ - **Group Chat UI**: Talk to all agents at once via web-based dialog panel
63
+ - **Code Review**: Multi-agent review with dual-supervisor voting
64
+ - **Observer Mode**: Watch external coding sessions, flag bugs in real-time
65
+ - **@Mentions**: Route tasks to specific agents (`@coder fix line 45`, `@security scan for vulns`)
66
+ - **20+ Languages**: Auto-detect user language, respond in same language
67
+
68
+ ## Install
69
+
70
+ ```bash
71
+ pip install crosscheck-ai
72
+ ```
73
+
74
+ ## Quick Start
75
+
76
+ ```bash
77
+ export CROSSCHECK_API_KEY=sk-or-...
78
+
79
+ # Launch AI Dev Team
80
+ crosscheck team -t "Add authentication" -f app.py
81
+
82
+ # Open Group Chat UI
83
+ crosscheck chat --port 8080
84
+
85
+ # Code review
86
+ crosscheck review mycode.py
87
+
88
+ # Observer mode
89
+ crosscheck observe watch ./src
90
+ ```
91
+
92
+ ## License
93
+
94
+ Copyright (c) 2026-2027 Steddy Nova Srl. All Rights Reserved.
95
+
96
+ Contact: info@yuyai.pro
@@ -0,0 +1,44 @@
1
+ # crosscheck-ai
2
+
3
+ **AI Dev Team** — multi-agent coding pipeline where Claude codes and other models advise, debug, review, and plan.
4
+
5
+ Built on OpenRouter. 31 verified models. 229 tests passing.
6
+
7
+ ## Features
8
+
9
+ - **AI Dev Team**: Claude writes code, GPT-5 architects, DeepSeek debugs, Grok scans security, Gemini analyzes flow
10
+ - **Group Chat UI**: Talk to all agents at once via web-based dialog panel
11
+ - **Code Review**: Multi-agent review with dual-supervisor voting
12
+ - **Observer Mode**: Watch external coding sessions, flag bugs in real-time
13
+ - **@Mentions**: Route tasks to specific agents (`@coder fix line 45`, `@security scan for vulns`)
14
+ - **20+ Languages**: Auto-detect user language, respond in same language
15
+
16
+ ## Install
17
+
18
+ ```bash
19
+ pip install crosscheck-ai
20
+ ```
21
+
22
+ ## Quick Start
23
+
24
+ ```bash
25
+ export CROSSCHECK_API_KEY=sk-or-...
26
+
27
+ # Launch AI Dev Team
28
+ crosscheck team -t "Add authentication" -f app.py
29
+
30
+ # Open Group Chat UI
31
+ crosscheck chat --port 8080
32
+
33
+ # Code review
34
+ crosscheck review mycode.py
35
+
36
+ # Observer mode
37
+ crosscheck observe watch ./src
38
+ ```
39
+
40
+ ## License
41
+
42
+ Copyright (c) 2026-2027 Steddy Nova Srl. All Rights Reserved.
43
+
44
+ Contact: info@yuyai.pro
@@ -0,0 +1,34 @@
1
+ """
2
+ crosscheck-ai -- Multi-agent AI review & coding pipeline via OpenRouter.
3
+ """
4
+ __version__ = "0.3.0"
5
+
6
+ from crosscheck.agents.supervisor import SupervisorAgent, DecomposeResult, SynthesisResult
7
+ from crosscheck.agents.analyzer import AnalyzerPool, AnalyzerReport
8
+ from crosscheck.agents.coder import CoderAgent
9
+ from crosscheck.core import MultiAgentSession, SessionResult, Round
10
+ from crosscheck.config import CrosscheckConfig, load_config
11
+ from crosscheck.models import Mode, Task, Tier, ModelSpec, REGISTRY
12
+ from crosscheck.monitor import CrosscheckMonitor, NoOpMonitor, AnomalyEvent
13
+ from crosscheck.observer import ObserverSession, ObserverResult, ObserverFlag, FolderWatcher
14
+ from crosscheck.client import OpenRouterClient
15
+
16
+ __all__ = [
17
+ "__version__",
18
+ # Agents
19
+ "SupervisorAgent", "DecomposeResult", "SynthesisResult",
20
+ "AnalyzerPool", "AnalyzerReport",
21
+ "CoderAgent",
22
+ # Core
23
+ "MultiAgentSession", "SessionResult", "Round",
24
+ # Config
25
+ "CrosscheckConfig", "load_config",
26
+ # Models
27
+ "Mode", "Task", "Tier", "ModelSpec", "REGISTRY",
28
+ # Monitor
29
+ "CrosscheckMonitor", "NoOpMonitor", "AnomalyEvent",
30
+ # Observer
31
+ "ObserverSession", "ObserverResult", "ObserverFlag", "FolderWatcher",
32
+ # Client
33
+ "OpenRouterClient",
34
+ ]
@@ -0,0 +1,5 @@
1
+ """Allow running crosscheck as: python -m crosscheck"""
2
+ from crosscheck.cli import cli
3
+
4
+ if __name__ == "__main__":
5
+ cli()
@@ -0,0 +1,17 @@
1
+ """
2
+ crosscheck.agents
3
+ -----------------
4
+ Agent subpackage: supervisor, analyzer, coder.
5
+ """
6
+ from crosscheck.agents.supervisor import SupervisorAgent, DecomposeResult, SynthesisResult
7
+ from crosscheck.agents.analyzer import AnalyzerPool, AnalyzerReport
8
+ from crosscheck.agents.coder import CoderAgent
9
+
10
+ __all__ = [
11
+ "SupervisorAgent",
12
+ "DecomposeResult",
13
+ "SynthesisResult",
14
+ "AnalyzerPool",
15
+ "AnalyzerReport",
16
+ "CoderAgent",
17
+ ]
@@ -0,0 +1,156 @@
1
+ """
2
+ crosscheck.agents.analyzer
3
+ ---------------------------
4
+ Analyzer agent pool — runs 3-4 models in parallel, each probing a different
5
+ angle of the content. Results are structured AnalyzerReport objects.
6
+
7
+ Each analyzer gets a specific task from the Supervisor's decomposition.
8
+ If a model errors, it returns an ERROR report rather than crashing the session.
9
+ Prior round feedback is passed in so analyzers can see if their issues were fixed.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+ import json
16
+ from dataclasses import dataclass, field
17
+ from typing import Optional
18
+
19
+ from crosscheck.client import OpenRouterClient
20
+ from crosscheck.prompts import ANALYZER_SYSTEM, analyzer_prompt
21
+
22
+
23
+ @dataclass
24
+ class AnalyzerReport:
25
+ model_id: str
26
+ angle: str
27
+ score: float
28
+ verdict: str # "PASS" | "ISSUES_FOUND" | "ERROR"
29
+ issues: list[dict] # [{severity, location, description, fix}]
30
+ positive_findings: list[str]
31
+ summary: str
32
+ raw: dict = field(default_factory=dict)
33
+ error: Optional[str] = None
34
+
35
+
36
+ class AnalyzerPool:
37
+ """Runs N analyzer models in parallel, one task per model."""
38
+
39
+ def __init__(
40
+ self,
41
+ client: OpenRouterClient,
42
+ models: list[str],
43
+ ):
44
+ if not models:
45
+ raise ValueError("AnalyzerPool requires at least one model ID.")
46
+ self.client = client
47
+ self.models = models
48
+
49
+ async def analyze(
50
+ self,
51
+ content: str,
52
+ tasks: list[dict], # from Supervisor.decompose()
53
+ round_num: int,
54
+ prior_reports: Optional[list[AnalyzerReport]] = None,
55
+ ) -> list[AnalyzerReport]:
56
+ """
57
+ Run all analyzers in parallel.
58
+ Models are paired to tasks by index (cycling if more models than tasks).
59
+ Returns one AnalyzerReport per model — errors are caught individually.
60
+ """
61
+ pairs: list[tuple[str, dict, str]] = []
62
+ for i, model in enumerate(self.models):
63
+ task = tasks[i % len(tasks)] if tasks else {"angle": "General", "instruction": "Analyze thoroughly."}
64
+ prior = ""
65
+ if prior_reports:
66
+ prev = next((r for r in prior_reports if r.model_id == model), None)
67
+ if prev and prev.issues:
68
+ prior = (
69
+ f"Prior round score: {prev.score}/10. "
70
+ f"Prior issues: {json.dumps(prev.issues[:5])}"
71
+ )
72
+ pairs.append((model, task, prior))
73
+
74
+ results = await asyncio.gather(
75
+ *[self._single(model, task, content, round_num, prior)
76
+ for model, task, prior in pairs],
77
+ return_exceptions=True,
78
+ )
79
+
80
+ reports: list[AnalyzerReport] = []
81
+ for i, result in enumerate(results):
82
+ model_id = pairs[i][0]
83
+ angle = pairs[i][1].get("angle", f"Angle {i + 1}")
84
+ if isinstance(result, Exception):
85
+ reports.append(AnalyzerReport(
86
+ model_id=model_id, angle=angle,
87
+ score=0.0, verdict="ERROR",
88
+ issues=[], positive_findings=[],
89
+ summary="Analyzer failed to respond.",
90
+ error=str(result),
91
+ ))
92
+ else:
93
+ reports.append(result)
94
+
95
+ return reports
96
+
97
+ async def _single(
98
+ self,
99
+ model: str,
100
+ task: dict,
101
+ content: str,
102
+ round_num: int,
103
+ prior: str,
104
+ ) -> AnalyzerReport:
105
+ angle = task.get("angle", "General")
106
+ instruction = task.get("instruction", "Analyze the content thoroughly.")
107
+
108
+ prompt = analyzer_prompt(
109
+ angle = angle,
110
+ instruction = instruction,
111
+ content = content,
112
+ round_num = round_num,
113
+ prior_feedback = prior,
114
+ )
115
+
116
+ raw = await self.client.chat(
117
+ model = model,
118
+ messages = [
119
+ {"role": "system", "content": ANALYZER_SYSTEM},
120
+ {"role": "user", "content": prompt},
121
+ ],
122
+ temperature = 0.2,
123
+ json_mode = True,
124
+ )
125
+
126
+ parsed = self._parse(raw, model)
127
+ return AnalyzerReport(
128
+ model_id = model,
129
+ angle = parsed.get("angle", angle),
130
+ score = float(parsed.get("score", 0)),
131
+ verdict = parsed.get("verdict", "ISSUES_FOUND"),
132
+ issues = parsed.get("issues", []),
133
+ positive_findings = parsed.get("positive_findings", []),
134
+ summary = parsed.get("summary", ""),
135
+ raw = parsed,
136
+ )
137
+
138
+ @staticmethod
139
+ def _parse(raw: str, model: str) -> dict:
140
+ try:
141
+ return json.loads(raw)
142
+ except json.JSONDecodeError:
143
+ cleaned = (
144
+ raw.strip()
145
+ .removeprefix("```json")
146
+ .removeprefix("```")
147
+ .removesuffix("```")
148
+ .strip()
149
+ )
150
+ try:
151
+ return json.loads(cleaned)
152
+ except Exception as e:
153
+ raise ValueError(
154
+ f"Analyzer {model}: JSON parse failed — {e}\n"
155
+ f"Raw (first 300 chars): {raw[:300]}"
156
+ )
@@ -0,0 +1,84 @@
1
+ """
2
+ crosscheck.agents.coder
3
+ ------------------------
4
+ Coder agent — the final implementer in the pipeline.
5
+
6
+ DESIGN: Any OpenRouter model can be Coder. The user chooses via
7
+ --interactive, --coder flag, or named profile. Anthropic is the
8
+ default in presets but is NOT enforced. DeepSeek, Grok, Kimi, etc.
9
+ are all valid choices.
10
+
11
+ The coder receives the original content, supervisor instructions,
12
+ and all critical/major issues, then returns the complete revised content.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from crosscheck.client import OpenRouterClient
18
+ from crosscheck.models import Task
19
+ from crosscheck.prompts import CODER_SYSTEM, coder_prompt
20
+
21
+
22
+ class CoderAgent:
23
+ """Wraps any single model as the Coder. No model-ID restrictions."""
24
+
25
+ def __init__(
26
+ self,
27
+ client: OpenRouterClient,
28
+ model: str,
29
+ task: Task,
30
+ ):
31
+ if not model:
32
+ raise ValueError("CoderAgent requires a non-empty model ID.")
33
+ self.client = client
34
+ self.model = model
35
+ self.task = task
36
+
37
+ async def revise(
38
+ self,
39
+ content: str,
40
+ instructions: str,
41
+ issues: list[dict],
42
+ round_num: int,
43
+ max_tokens: int = 8192,
44
+ ) -> str:
45
+ """
46
+ Produce a complete revised version of `content` resolving all `issues`.
47
+ Returns raw revised content — no JSON, no preamble.
48
+ """
49
+ prompt = coder_prompt(
50
+ task = self.task,
51
+ content = content,
52
+ instructions = instructions,
53
+ issues = self._format_issues(issues),
54
+ round_num = round_num,
55
+ )
56
+
57
+ revised = await self.client.chat(
58
+ model = self.model,
59
+ messages = [
60
+ {"role": "system", "content": CODER_SYSTEM},
61
+ {"role": "user", "content": prompt},
62
+ ],
63
+ max_tokens = max_tokens,
64
+ temperature = 0.2,
65
+ json_mode = False, # raw content output, never JSON
66
+ )
67
+
68
+ return revised.strip()
69
+
70
+ @staticmethod
71
+ def _format_issues(issues: list[dict]) -> str:
72
+ if not issues:
73
+ return "No specific issues flagged — improve overall quality and clarity."
74
+ lines = []
75
+ for i, issue in enumerate(issues, 1):
76
+ sev = issue.get("severity", "major").upper()
77
+ loc = issue.get("location", "")
78
+ desc = issue.get("description", "")
79
+ fix = issue.get("fix", "")
80
+ loc_str = f" [{loc}]" if loc else ""
81
+ lines.append(f"{i}. [{sev}]{loc_str} {desc}")
82
+ if fix:
83
+ lines.append(f" → Fix: {fix}")
84
+ return "\n".join(lines)
@@ -0,0 +1,205 @@
1
+ """
2
+ crosscheck.agents.supervisor
3
+ -----------------------------
4
+ Supervisor agent — orchestrates the session, decomposes tasks, synthesizes
5
+ analyzer reports, and makes APPROVED / REVISE decisions.
6
+
7
+ DUAL SUPERVISOR VOTING (when 2 models configured):
8
+ Both run independently on every synthesis call. Rules:
9
+ - Both APPROVED → APPROVED (unanimous)
10
+ - Both REVISE → REVISE (unanimous)
11
+ - Disagreement + both scores ≥ 8.5 → APPROVED (near-perfect exception)
12
+ - Disagreement + any score < 8.5 → REVISE (conservative, safe default)
13
+ Critical issues and coder instructions are merged from both.
14
+ consensus=False is recorded in the result for visibility.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import asyncio
20
+ import json
21
+ from dataclasses import dataclass, field
22
+ from typing import Optional
23
+
24
+ from crosscheck.client import OpenRouterClient
25
+ from crosscheck.models import Task
26
+ from crosscheck.prompts import (
27
+ SUPERVISOR_SYSTEM,
28
+ supervisor_decompose,
29
+ supervisor_synthesize,
30
+ )
31
+
32
+
33
+ @dataclass
34
+ class DecomposeResult:
35
+ session_goal: str
36
+ analyzer_tasks: list[dict] # [{id, angle, instruction}]
37
+
38
+
39
+ @dataclass
40
+ class SynthesisResult:
41
+ verdict: str # "APPROVED" | "REVISE"
42
+ overall_score: float
43
+ summary: str
44
+ critical_issues: list[str]
45
+ coder_instructions: str
46
+ approved_aspects: list[str]
47
+ raw: dict = field(default_factory=dict)
48
+ supervisor_votes: list[str] = field(default_factory=list)
49
+ consensus: bool = True
50
+
51
+
52
+ class SupervisorAgent:
53
+ """
54
+ Wraps 1 or 2 supervisor models.
55
+ With 2 models, both synthesize in parallel and votes are reconciled.
56
+ Any OpenRouter model ID is accepted — no restrictions.
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ client: OpenRouterClient,
62
+ models: list[str],
63
+ task: Task,
64
+ max_rounds: int,
65
+ ):
66
+ if not models:
67
+ raise ValueError("SupervisorAgent requires at least one model ID.")
68
+ self.client = client
69
+ self.models = models[:2] # hard cap at 2
70
+ self.task = task
71
+ self.max_rounds = max_rounds
72
+
73
+ # ── Public API ───────────────────────────────────────────────────────
74
+
75
+ async def decompose(self, content: str, round_num: int) -> DecomposeResult:
76
+ """Primary supervisor decomposes the content into per-analyzer tasks."""
77
+ prompt = supervisor_decompose(self.task, content, round_num, self.max_rounds)
78
+ raw = await self._call(self.models[0], prompt)
79
+ parsed = self._parse_json(raw, "decompose")
80
+ return DecomposeResult(
81
+ session_goal = parsed.get("session_goal", ""),
82
+ analyzer_tasks = parsed.get("analyzer_tasks", []),
83
+ )
84
+
85
+ async def synthesize(
86
+ self,
87
+ analyzer_reports: list[dict],
88
+ round_num: int,
89
+ ) -> SynthesisResult:
90
+ """
91
+ Synthesize analyzer reports into a final verdict.
92
+ Single supervisor: straightforward.
93
+ Dual supervisor: both run in parallel, votes reconciled.
94
+ """
95
+ reports_text = json.dumps(analyzer_reports, indent=2)
96
+ prompt = supervisor_synthesize(reports_text, round_num)
97
+
98
+ if len(self.models) == 1:
99
+ return await self._single_synthesis(self.models[0], prompt)
100
+
101
+ # Dual: parallel execution
102
+ r1, r2 = await asyncio.gather(
103
+ self._single_synthesis(self.models[0], prompt),
104
+ self._single_synthesis(self.models[1], prompt),
105
+ )
106
+ return self._reconcile(r1, r2)
107
+
108
+ # ── Private ───────────────────────────────────────────────────────────
109
+
110
+ async def _single_synthesis(self, model: str, prompt: str) -> SynthesisResult:
111
+ raw = await self._call(model, prompt)
112
+ parsed = self._parse_json(raw, f"synthesize[{model.split('/')[-1]}]")
113
+ return SynthesisResult(
114
+ verdict = parsed.get("verdict", "REVISE"),
115
+ overall_score = float(parsed.get("overall_score", 0)),
116
+ summary = parsed.get("summary", ""),
117
+ critical_issues = parsed.get("critical_issues", []),
118
+ coder_instructions = parsed.get("coder_instructions", ""),
119
+ approved_aspects = parsed.get("approved_aspects", []),
120
+ raw = parsed,
121
+ supervisor_votes = [parsed.get("verdict", "REVISE")],
122
+ consensus = True,
123
+ )
124
+
125
+ @staticmethod
126
+ def _reconcile(r1: SynthesisResult, r2: SynthesisResult) -> SynthesisResult:
127
+ """Reconcile two supervisor verdicts. Conservative: REVISE wins on split."""
128
+ votes = [r1.verdict, r2.verdict]
129
+
130
+ if r1.verdict == r2.verdict:
131
+ # Unanimous — easy
132
+ final_verdict = r1.verdict
133
+ consensus = True
134
+ else:
135
+ # Split vote
136
+ both_high = r1.overall_score >= 8.5 and r2.overall_score >= 8.5
137
+ if both_high:
138
+ # Both nearly perfect — allow APPROVED despite split
139
+ final_verdict = "APPROVED"
140
+ else:
141
+ # Safe default: if in doubt, revise
142
+ final_verdict = "REVISE"
143
+ consensus = False
144
+
145
+ # Use the higher-scoring result as primary for narrative fields
146
+ primary = r1 if r1.overall_score >= r2.overall_score else r2
147
+ secondary = r2 if primary is r1 else r1
148
+
149
+ # Deduplicated merge of critical issues
150
+ merged_issues = list(dict.fromkeys(
151
+ primary.critical_issues + secondary.critical_issues
152
+ ))
153
+
154
+ # Merge coder instructions (append secondary only when relevant)
155
+ merged_instructions = primary.coder_instructions
156
+ if final_verdict == "REVISE" and secondary.coder_instructions:
157
+ merged_instructions = (
158
+ f"{primary.coder_instructions}\n\n"
159
+ f"[Supervisor 2 additions]\n{secondary.coder_instructions}"
160
+ ).strip()
161
+
162
+ return SynthesisResult(
163
+ verdict = final_verdict,
164
+ overall_score = round((r1.overall_score + r2.overall_score) / 2, 1),
165
+ summary = primary.summary,
166
+ critical_issues = merged_issues,
167
+ coder_instructions = merged_instructions,
168
+ approved_aspects = list(dict.fromkeys(
169
+ primary.approved_aspects + secondary.approved_aspects
170
+ )),
171
+ raw = {"supervisor_1": r1.raw, "supervisor_2": r2.raw},
172
+ supervisor_votes = votes,
173
+ consensus = consensus,
174
+ )
175
+
176
+ async def _call(self, model: str, user_prompt: str) -> str:
177
+ return await self.client.chat(
178
+ model = model,
179
+ messages = [
180
+ {"role": "system", "content": SUPERVISOR_SYSTEM},
181
+ {"role": "user", "content": user_prompt},
182
+ ],
183
+ temperature = 0.2,
184
+ json_mode = True,
185
+ )
186
+
187
+ @staticmethod
188
+ def _parse_json(raw: str, context: str) -> dict:
189
+ try:
190
+ return json.loads(raw)
191
+ except json.JSONDecodeError:
192
+ cleaned = (
193
+ raw.strip()
194
+ .removeprefix("```json")
195
+ .removeprefix("```")
196
+ .removesuffix("```")
197
+ .strip()
198
+ )
199
+ try:
200
+ return json.loads(cleaned)
201
+ except Exception as e:
202
+ raise ValueError(
203
+ f"Supervisor {context}: JSON parse failed — {e}\n"
204
+ f"Raw (first 400 chars): {raw[:400]}"
205
+ )