crosscheck-ai 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crosscheck_ai-1.0.0/LICENSE +14 -0
- crosscheck_ai-1.0.0/PKG-INFO +96 -0
- crosscheck_ai-1.0.0/README.md +44 -0
- crosscheck_ai-1.0.0/crosscheck/__init__.py +34 -0
- crosscheck_ai-1.0.0/crosscheck/__main__.py +5 -0
- crosscheck_ai-1.0.0/crosscheck/agents/__init__.py +17 -0
- crosscheck_ai-1.0.0/crosscheck/agents/analyzer.py +156 -0
- crosscheck_ai-1.0.0/crosscheck/agents/coder.py +84 -0
- crosscheck_ai-1.0.0/crosscheck/agents/supervisor.py +205 -0
- crosscheck_ai-1.0.0/crosscheck/cache.py +366 -0
- crosscheck_ai-1.0.0/crosscheck/cli.py +787 -0
- crosscheck_ai-1.0.0/crosscheck/cli_extensions.py +368 -0
- crosscheck_ai-1.0.0/crosscheck/client.py +189 -0
- crosscheck_ai-1.0.0/crosscheck/config.py +275 -0
- crosscheck_ai-1.0.0/crosscheck/context_builder.py +327 -0
- crosscheck_ai-1.0.0/crosscheck/core.py +317 -0
- crosscheck_ai-1.0.0/crosscheck/dashboard.py +460 -0
- crosscheck_ai-1.0.0/crosscheck/diff.py +234 -0
- crosscheck_ai-1.0.0/crosscheck/local.py +334 -0
- crosscheck_ai-1.0.0/crosscheck/models.py +541 -0
- crosscheck_ai-1.0.0/crosscheck/monitor.py +216 -0
- crosscheck_ai-1.0.0/crosscheck/observer.py +367 -0
- crosscheck_ai-1.0.0/crosscheck/policy.py +332 -0
- crosscheck_ai-1.0.0/crosscheck/pr_bot.py +470 -0
- crosscheck_ai-1.0.0/crosscheck/prompts.py +208 -0
- crosscheck_ai-1.0.0/crosscheck/reporter.py +423 -0
- crosscheck_ai-1.0.0/crosscheck/sandbox.py +338 -0
- crosscheck_ai-1.0.0/crosscheck/streaming.py +316 -0
- crosscheck_ai-1.0.0/crosscheck/team/__init__.py +30 -0
- crosscheck_ai-1.0.0/crosscheck/team/chat.py +206 -0
- crosscheck_ai-1.0.0/crosscheck/team/command_parser.py +161 -0
- crosscheck_ai-1.0.0/crosscheck/team/language.py +76 -0
- crosscheck_ai-1.0.0/crosscheck/team/roles.py +213 -0
- crosscheck_ai-1.0.0/crosscheck/team/session.py +384 -0
- crosscheck_ai-1.0.0/crosscheck_ai.egg-info/PKG-INFO +96 -0
- crosscheck_ai-1.0.0/crosscheck_ai.egg-info/SOURCES.txt +47 -0
- crosscheck_ai-1.0.0/crosscheck_ai.egg-info/dependency_links.txt +1 -0
- crosscheck_ai-1.0.0/crosscheck_ai.egg-info/entry_points.txt +2 -0
- crosscheck_ai-1.0.0/crosscheck_ai.egg-info/requires.txt +36 -0
- crosscheck_ai-1.0.0/crosscheck_ai.egg-info/top_level.txt +1 -0
- crosscheck_ai-1.0.0/pyproject.toml +75 -0
- crosscheck_ai-1.0.0/setup.cfg +4 -0
- crosscheck_ai-1.0.0/tests/test_cli.py +271 -0
- crosscheck_ai-1.0.0/tests/test_core.py +196 -0
- crosscheck_ai-1.0.0/tests/test_models.py +221 -0
- crosscheck_ai-1.0.0/tests/test_observer.py +522 -0
- crosscheck_ai-1.0.0/tests/test_phase2.py +595 -0
- crosscheck_ai-1.0.0/tests/test_supervisor.py +367 -0
- crosscheck_ai-1.0.0/tests/test_team.py +584 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Copyright (c) 2026-2027 Steddy Nova Srl. All Rights Reserved.
|
|
2
|
+
|
|
3
|
+
PROPRIETARY SOFTWARE LICENSE
|
|
4
|
+
|
|
5
|
+
This software and associated documentation files (the "Software") are the
|
|
6
|
+
exclusive property of Steddy Nova Srl. Unauthorized copying, distribution,
|
|
7
|
+
modification, or use of this Software, in whole or in part, is strictly
|
|
8
|
+
prohibited without prior written consent from Steddy Nova Srl.
|
|
9
|
+
|
|
10
|
+
The Software is provided "AS IS", without warranty of any kind, express or
|
|
11
|
+
implied. In no event shall Steddy Nova Srl be liable for any claim, damages,
|
|
12
|
+
or other liability arising from the use of the Software.
|
|
13
|
+
|
|
14
|
+
For licensing inquiries: info@yuyai.pro
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: crosscheck-ai
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: AI Dev Team -- multi-agent coding pipeline where Claude codes and other models advise, debug, review, and plan. Built on OpenRouter.
|
|
5
|
+
Author-email: Steddy Nova Srl <info@yuyai.pro>
|
|
6
|
+
License: Proprietary
|
|
7
|
+
Project-URL: Homepage, https://github.com/Nomadu27/crosscheck-ai
|
|
8
|
+
Project-URL: Repository, https://github.com/Nomadu27/crosscheck-ai
|
|
9
|
+
Project-URL: Issues, https://github.com/Nomadu27/crosscheck-ai/issues
|
|
10
|
+
Keywords: ai,llm,review,openrouter,multi-agent,code-review,insaits,observer
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: Other/Proprietary License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: click>=8.1
|
|
23
|
+
Requires-Dist: rich>=13.0
|
|
24
|
+
Requires-Dist: httpx>=0.27
|
|
25
|
+
Requires-Dist: pydantic>=2.0
|
|
26
|
+
Requires-Dist: questionary>=2.0
|
|
27
|
+
Requires-Dist: watchdog>=4.0
|
|
28
|
+
Requires-Dist: python-dotenv>=1.0
|
|
29
|
+
Provides-Extra: monitor
|
|
30
|
+
Requires-Dist: insa-its>=1.0; extra == "monitor"
|
|
31
|
+
Provides-Extra: webhook
|
|
32
|
+
Requires-Dist: fastapi>=0.110; extra == "webhook"
|
|
33
|
+
Requires-Dist: uvicorn>=0.29; extra == "webhook"
|
|
34
|
+
Provides-Extra: dashboard
|
|
35
|
+
Requires-Dist: fastapi>=0.110; extra == "dashboard"
|
|
36
|
+
Requires-Dist: uvicorn>=0.29; extra == "dashboard"
|
|
37
|
+
Provides-Extra: policy
|
|
38
|
+
Requires-Dist: pyyaml>=6.0; extra == "policy"
|
|
39
|
+
Provides-Extra: local
|
|
40
|
+
Provides-Extra: all
|
|
41
|
+
Requires-Dist: insa-its>=1.0; extra == "all"
|
|
42
|
+
Requires-Dist: fastapi>=0.110; extra == "all"
|
|
43
|
+
Requires-Dist: uvicorn>=0.29; extra == "all"
|
|
44
|
+
Requires-Dist: pyyaml>=6.0; extra == "all"
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
47
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
48
|
+
Requires-Dist: pytest-mock>=3.14; extra == "dev"
|
|
49
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
50
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
51
|
+
Dynamic: license-file
|
|
52
|
+
|
|
53
|
+
# crosscheck-ai
|
|
54
|
+
|
|
55
|
+
**AI Dev Team** — multi-agent coding pipeline where Claude codes and other models advise, debug, review, and plan.
|
|
56
|
+
|
|
57
|
+
Built on OpenRouter. 31 verified models. 229 tests passing.
|
|
58
|
+
|
|
59
|
+
## Features
|
|
60
|
+
|
|
61
|
+
- **AI Dev Team**: Claude writes code, GPT-5 architects, DeepSeek debugs, Grok scans security, Gemini analyzes flow
|
|
62
|
+
- **Group Chat UI**: Talk to all agents at once via web-based dialog panel
|
|
63
|
+
- **Code Review**: Multi-agent review with dual-supervisor voting
|
|
64
|
+
- **Observer Mode**: Watch external coding sessions, flag bugs in real-time
|
|
65
|
+
- **@Mentions**: Route tasks to specific agents (`@coder fix line 45`, `@security scan for vulns`)
|
|
66
|
+
- **20+ Languages**: Auto-detect user language, respond in same language
|
|
67
|
+
|
|
68
|
+
## Install
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install crosscheck-ai
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
export CROSSCHECK_API_KEY=sk-or-...
|
|
78
|
+
|
|
79
|
+
# Launch AI Dev Team
|
|
80
|
+
crosscheck team -t "Add authentication" -f app.py
|
|
81
|
+
|
|
82
|
+
# Open Group Chat UI
|
|
83
|
+
crosscheck chat --port 8080
|
|
84
|
+
|
|
85
|
+
# Code review
|
|
86
|
+
crosscheck review mycode.py
|
|
87
|
+
|
|
88
|
+
# Observer mode
|
|
89
|
+
crosscheck observe watch ./src
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## License
|
|
93
|
+
|
|
94
|
+
Copyright (c) 2026-2027 Steddy Nova Srl. All Rights Reserved.
|
|
95
|
+
|
|
96
|
+
Contact: info@yuyai.pro
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# crosscheck-ai
|
|
2
|
+
|
|
3
|
+
**AI Dev Team** — multi-agent coding pipeline where Claude codes and other models advise, debug, review, and plan.
|
|
4
|
+
|
|
5
|
+
Built on OpenRouter. 31 verified models. 229 tests passing.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **AI Dev Team**: Claude writes code, GPT-5 architects, DeepSeek debugs, Grok scans security, Gemini analyzes flow
|
|
10
|
+
- **Group Chat UI**: Talk to all agents at once via web-based dialog panel
|
|
11
|
+
- **Code Review**: Multi-agent review with dual-supervisor voting
|
|
12
|
+
- **Observer Mode**: Watch external coding sessions, flag bugs in real-time
|
|
13
|
+
- **@Mentions**: Route tasks to specific agents (`@coder fix line 45`, `@security scan for vulns`)
|
|
14
|
+
- **20+ Languages**: Auto-detect user language, respond in same language
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install crosscheck-ai
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
export CROSSCHECK_API_KEY=sk-or-...
|
|
26
|
+
|
|
27
|
+
# Launch AI Dev Team
|
|
28
|
+
crosscheck team -t "Add authentication" -f app.py
|
|
29
|
+
|
|
30
|
+
# Open Group Chat UI
|
|
31
|
+
crosscheck chat --port 8080
|
|
32
|
+
|
|
33
|
+
# Code review
|
|
34
|
+
crosscheck review mycode.py
|
|
35
|
+
|
|
36
|
+
# Observer mode
|
|
37
|
+
crosscheck observe watch ./src
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## License
|
|
41
|
+
|
|
42
|
+
Copyright (c) 2026-2027 Steddy Nova Srl. All Rights Reserved.
|
|
43
|
+
|
|
44
|
+
Contact: info@yuyai.pro
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
crosscheck-ai -- Multi-agent AI review & coding pipeline via OpenRouter.
|
|
3
|
+
"""
|
|
4
|
+
__version__ = "0.3.0"
|
|
5
|
+
|
|
6
|
+
from crosscheck.agents.supervisor import SupervisorAgent, DecomposeResult, SynthesisResult
|
|
7
|
+
from crosscheck.agents.analyzer import AnalyzerPool, AnalyzerReport
|
|
8
|
+
from crosscheck.agents.coder import CoderAgent
|
|
9
|
+
from crosscheck.core import MultiAgentSession, SessionResult, Round
|
|
10
|
+
from crosscheck.config import CrosscheckConfig, load_config
|
|
11
|
+
from crosscheck.models import Mode, Task, Tier, ModelSpec, REGISTRY
|
|
12
|
+
from crosscheck.monitor import CrosscheckMonitor, NoOpMonitor, AnomalyEvent
|
|
13
|
+
from crosscheck.observer import ObserverSession, ObserverResult, ObserverFlag, FolderWatcher
|
|
14
|
+
from crosscheck.client import OpenRouterClient
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"__version__",
|
|
18
|
+
# Agents
|
|
19
|
+
"SupervisorAgent", "DecomposeResult", "SynthesisResult",
|
|
20
|
+
"AnalyzerPool", "AnalyzerReport",
|
|
21
|
+
"CoderAgent",
|
|
22
|
+
# Core
|
|
23
|
+
"MultiAgentSession", "SessionResult", "Round",
|
|
24
|
+
# Config
|
|
25
|
+
"CrosscheckConfig", "load_config",
|
|
26
|
+
# Models
|
|
27
|
+
"Mode", "Task", "Tier", "ModelSpec", "REGISTRY",
|
|
28
|
+
# Monitor
|
|
29
|
+
"CrosscheckMonitor", "NoOpMonitor", "AnomalyEvent",
|
|
30
|
+
# Observer
|
|
31
|
+
"ObserverSession", "ObserverResult", "ObserverFlag", "FolderWatcher",
|
|
32
|
+
# Client
|
|
33
|
+
"OpenRouterClient",
|
|
34
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
crosscheck.agents
|
|
3
|
+
-----------------
|
|
4
|
+
Agent subpackage: supervisor, analyzer, coder.
|
|
5
|
+
"""
|
|
6
|
+
from crosscheck.agents.supervisor import SupervisorAgent, DecomposeResult, SynthesisResult
|
|
7
|
+
from crosscheck.agents.analyzer import AnalyzerPool, AnalyzerReport
|
|
8
|
+
from crosscheck.agents.coder import CoderAgent
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"SupervisorAgent",
|
|
12
|
+
"DecomposeResult",
|
|
13
|
+
"SynthesisResult",
|
|
14
|
+
"AnalyzerPool",
|
|
15
|
+
"AnalyzerReport",
|
|
16
|
+
"CoderAgent",
|
|
17
|
+
]
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""
|
|
2
|
+
crosscheck.agents.analyzer
|
|
3
|
+
---------------------------
|
|
4
|
+
Analyzer agent pool — runs 3-4 models in parallel, each probing a different
|
|
5
|
+
angle of the content. Results are structured AnalyzerReport objects.
|
|
6
|
+
|
|
7
|
+
Each analyzer gets a specific task from the Supervisor's decomposition.
|
|
8
|
+
If a model errors, it returns an ERROR report rather than crashing the session.
|
|
9
|
+
Prior round feedback is passed in so analyzers can see if their issues were fixed.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
import json
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
from crosscheck.client import OpenRouterClient
|
|
20
|
+
from crosscheck.prompts import ANALYZER_SYSTEM, analyzer_prompt
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class AnalyzerReport:
|
|
25
|
+
model_id: str
|
|
26
|
+
angle: str
|
|
27
|
+
score: float
|
|
28
|
+
verdict: str # "PASS" | "ISSUES_FOUND" | "ERROR"
|
|
29
|
+
issues: list[dict] # [{severity, location, description, fix}]
|
|
30
|
+
positive_findings: list[str]
|
|
31
|
+
summary: str
|
|
32
|
+
raw: dict = field(default_factory=dict)
|
|
33
|
+
error: Optional[str] = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class AnalyzerPool:
|
|
37
|
+
"""Runs N analyzer models in parallel, one task per model."""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
client: OpenRouterClient,
|
|
42
|
+
models: list[str],
|
|
43
|
+
):
|
|
44
|
+
if not models:
|
|
45
|
+
raise ValueError("AnalyzerPool requires at least one model ID.")
|
|
46
|
+
self.client = client
|
|
47
|
+
self.models = models
|
|
48
|
+
|
|
49
|
+
async def analyze(
|
|
50
|
+
self,
|
|
51
|
+
content: str,
|
|
52
|
+
tasks: list[dict], # from Supervisor.decompose()
|
|
53
|
+
round_num: int,
|
|
54
|
+
prior_reports: Optional[list[AnalyzerReport]] = None,
|
|
55
|
+
) -> list[AnalyzerReport]:
|
|
56
|
+
"""
|
|
57
|
+
Run all analyzers in parallel.
|
|
58
|
+
Models are paired to tasks by index (cycling if more models than tasks).
|
|
59
|
+
Returns one AnalyzerReport per model — errors are caught individually.
|
|
60
|
+
"""
|
|
61
|
+
pairs: list[tuple[str, dict, str]] = []
|
|
62
|
+
for i, model in enumerate(self.models):
|
|
63
|
+
task = tasks[i % len(tasks)] if tasks else {"angle": "General", "instruction": "Analyze thoroughly."}
|
|
64
|
+
prior = ""
|
|
65
|
+
if prior_reports:
|
|
66
|
+
prev = next((r for r in prior_reports if r.model_id == model), None)
|
|
67
|
+
if prev and prev.issues:
|
|
68
|
+
prior = (
|
|
69
|
+
f"Prior round score: {prev.score}/10. "
|
|
70
|
+
f"Prior issues: {json.dumps(prev.issues[:5])}"
|
|
71
|
+
)
|
|
72
|
+
pairs.append((model, task, prior))
|
|
73
|
+
|
|
74
|
+
results = await asyncio.gather(
|
|
75
|
+
*[self._single(model, task, content, round_num, prior)
|
|
76
|
+
for model, task, prior in pairs],
|
|
77
|
+
return_exceptions=True,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
reports: list[AnalyzerReport] = []
|
|
81
|
+
for i, result in enumerate(results):
|
|
82
|
+
model_id = pairs[i][0]
|
|
83
|
+
angle = pairs[i][1].get("angle", f"Angle {i + 1}")
|
|
84
|
+
if isinstance(result, Exception):
|
|
85
|
+
reports.append(AnalyzerReport(
|
|
86
|
+
model_id=model_id, angle=angle,
|
|
87
|
+
score=0.0, verdict="ERROR",
|
|
88
|
+
issues=[], positive_findings=[],
|
|
89
|
+
summary="Analyzer failed to respond.",
|
|
90
|
+
error=str(result),
|
|
91
|
+
))
|
|
92
|
+
else:
|
|
93
|
+
reports.append(result)
|
|
94
|
+
|
|
95
|
+
return reports
|
|
96
|
+
|
|
97
|
+
async def _single(
|
|
98
|
+
self,
|
|
99
|
+
model: str,
|
|
100
|
+
task: dict,
|
|
101
|
+
content: str,
|
|
102
|
+
round_num: int,
|
|
103
|
+
prior: str,
|
|
104
|
+
) -> AnalyzerReport:
|
|
105
|
+
angle = task.get("angle", "General")
|
|
106
|
+
instruction = task.get("instruction", "Analyze the content thoroughly.")
|
|
107
|
+
|
|
108
|
+
prompt = analyzer_prompt(
|
|
109
|
+
angle = angle,
|
|
110
|
+
instruction = instruction,
|
|
111
|
+
content = content,
|
|
112
|
+
round_num = round_num,
|
|
113
|
+
prior_feedback = prior,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
raw = await self.client.chat(
|
|
117
|
+
model = model,
|
|
118
|
+
messages = [
|
|
119
|
+
{"role": "system", "content": ANALYZER_SYSTEM},
|
|
120
|
+
{"role": "user", "content": prompt},
|
|
121
|
+
],
|
|
122
|
+
temperature = 0.2,
|
|
123
|
+
json_mode = True,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
parsed = self._parse(raw, model)
|
|
127
|
+
return AnalyzerReport(
|
|
128
|
+
model_id = model,
|
|
129
|
+
angle = parsed.get("angle", angle),
|
|
130
|
+
score = float(parsed.get("score", 0)),
|
|
131
|
+
verdict = parsed.get("verdict", "ISSUES_FOUND"),
|
|
132
|
+
issues = parsed.get("issues", []),
|
|
133
|
+
positive_findings = parsed.get("positive_findings", []),
|
|
134
|
+
summary = parsed.get("summary", ""),
|
|
135
|
+
raw = parsed,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
@staticmethod
|
|
139
|
+
def _parse(raw: str, model: str) -> dict:
|
|
140
|
+
try:
|
|
141
|
+
return json.loads(raw)
|
|
142
|
+
except json.JSONDecodeError:
|
|
143
|
+
cleaned = (
|
|
144
|
+
raw.strip()
|
|
145
|
+
.removeprefix("```json")
|
|
146
|
+
.removeprefix("```")
|
|
147
|
+
.removesuffix("```")
|
|
148
|
+
.strip()
|
|
149
|
+
)
|
|
150
|
+
try:
|
|
151
|
+
return json.loads(cleaned)
|
|
152
|
+
except Exception as e:
|
|
153
|
+
raise ValueError(
|
|
154
|
+
f"Analyzer {model}: JSON parse failed — {e}\n"
|
|
155
|
+
f"Raw (first 300 chars): {raw[:300]}"
|
|
156
|
+
)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""
|
|
2
|
+
crosscheck.agents.coder
|
|
3
|
+
------------------------
|
|
4
|
+
Coder agent — the final implementer in the pipeline.
|
|
5
|
+
|
|
6
|
+
DESIGN: Any OpenRouter model can be Coder. The user chooses via
|
|
7
|
+
--interactive, --coder flag, or named profile. Anthropic is the
|
|
8
|
+
default in presets but is NOT enforced. DeepSeek, Grok, Kimi, etc.
|
|
9
|
+
are all valid choices.
|
|
10
|
+
|
|
11
|
+
The coder receives the original content, supervisor instructions,
|
|
12
|
+
and all critical/major issues, then returns the complete revised content.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from crosscheck.client import OpenRouterClient
|
|
18
|
+
from crosscheck.models import Task
|
|
19
|
+
from crosscheck.prompts import CODER_SYSTEM, coder_prompt
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CoderAgent:
|
|
23
|
+
"""Wraps any single model as the Coder. No model-ID restrictions."""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
client: OpenRouterClient,
|
|
28
|
+
model: str,
|
|
29
|
+
task: Task,
|
|
30
|
+
):
|
|
31
|
+
if not model:
|
|
32
|
+
raise ValueError("CoderAgent requires a non-empty model ID.")
|
|
33
|
+
self.client = client
|
|
34
|
+
self.model = model
|
|
35
|
+
self.task = task
|
|
36
|
+
|
|
37
|
+
async def revise(
|
|
38
|
+
self,
|
|
39
|
+
content: str,
|
|
40
|
+
instructions: str,
|
|
41
|
+
issues: list[dict],
|
|
42
|
+
round_num: int,
|
|
43
|
+
max_tokens: int = 8192,
|
|
44
|
+
) -> str:
|
|
45
|
+
"""
|
|
46
|
+
Produce a complete revised version of `content` resolving all `issues`.
|
|
47
|
+
Returns raw revised content — no JSON, no preamble.
|
|
48
|
+
"""
|
|
49
|
+
prompt = coder_prompt(
|
|
50
|
+
task = self.task,
|
|
51
|
+
content = content,
|
|
52
|
+
instructions = instructions,
|
|
53
|
+
issues = self._format_issues(issues),
|
|
54
|
+
round_num = round_num,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
revised = await self.client.chat(
|
|
58
|
+
model = self.model,
|
|
59
|
+
messages = [
|
|
60
|
+
{"role": "system", "content": CODER_SYSTEM},
|
|
61
|
+
{"role": "user", "content": prompt},
|
|
62
|
+
],
|
|
63
|
+
max_tokens = max_tokens,
|
|
64
|
+
temperature = 0.2,
|
|
65
|
+
json_mode = False, # raw content output, never JSON
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return revised.strip()
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def _format_issues(issues: list[dict]) -> str:
|
|
72
|
+
if not issues:
|
|
73
|
+
return "No specific issues flagged — improve overall quality and clarity."
|
|
74
|
+
lines = []
|
|
75
|
+
for i, issue in enumerate(issues, 1):
|
|
76
|
+
sev = issue.get("severity", "major").upper()
|
|
77
|
+
loc = issue.get("location", "")
|
|
78
|
+
desc = issue.get("description", "")
|
|
79
|
+
fix = issue.get("fix", "")
|
|
80
|
+
loc_str = f" [{loc}]" if loc else ""
|
|
81
|
+
lines.append(f"{i}. [{sev}]{loc_str} {desc}")
|
|
82
|
+
if fix:
|
|
83
|
+
lines.append(f" → Fix: {fix}")
|
|
84
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""
|
|
2
|
+
crosscheck.agents.supervisor
|
|
3
|
+
-----------------------------
|
|
4
|
+
Supervisor agent — orchestrates the session, decomposes tasks, synthesizes
|
|
5
|
+
analyzer reports, and makes APPROVED / REVISE decisions.
|
|
6
|
+
|
|
7
|
+
DUAL SUPERVISOR VOTING (when 2 models configured):
|
|
8
|
+
Both run independently on every synthesis call. Rules:
|
|
9
|
+
- Both APPROVED → APPROVED (unanimous)
|
|
10
|
+
- Both REVISE → REVISE (unanimous)
|
|
11
|
+
- Disagreement + both scores ≥ 8.5 → APPROVED (near-perfect exception)
|
|
12
|
+
- Disagreement + any score < 8.5 → REVISE (conservative, safe default)
|
|
13
|
+
Critical issues and coder instructions are merged from both.
|
|
14
|
+
consensus=False is recorded in the result for visibility.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import json
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from typing import Optional
|
|
23
|
+
|
|
24
|
+
from crosscheck.client import OpenRouterClient
|
|
25
|
+
from crosscheck.models import Task
|
|
26
|
+
from crosscheck.prompts import (
|
|
27
|
+
SUPERVISOR_SYSTEM,
|
|
28
|
+
supervisor_decompose,
|
|
29
|
+
supervisor_synthesize,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class DecomposeResult:
|
|
35
|
+
session_goal: str
|
|
36
|
+
analyzer_tasks: list[dict] # [{id, angle, instruction}]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class SynthesisResult:
|
|
41
|
+
verdict: str # "APPROVED" | "REVISE"
|
|
42
|
+
overall_score: float
|
|
43
|
+
summary: str
|
|
44
|
+
critical_issues: list[str]
|
|
45
|
+
coder_instructions: str
|
|
46
|
+
approved_aspects: list[str]
|
|
47
|
+
raw: dict = field(default_factory=dict)
|
|
48
|
+
supervisor_votes: list[str] = field(default_factory=list)
|
|
49
|
+
consensus: bool = True
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class SupervisorAgent:
|
|
53
|
+
"""
|
|
54
|
+
Wraps 1 or 2 supervisor models.
|
|
55
|
+
With 2 models, both synthesize in parallel and votes are reconciled.
|
|
56
|
+
Any OpenRouter model ID is accepted — no restrictions.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
client: OpenRouterClient,
|
|
62
|
+
models: list[str],
|
|
63
|
+
task: Task,
|
|
64
|
+
max_rounds: int,
|
|
65
|
+
):
|
|
66
|
+
if not models:
|
|
67
|
+
raise ValueError("SupervisorAgent requires at least one model ID.")
|
|
68
|
+
self.client = client
|
|
69
|
+
self.models = models[:2] # hard cap at 2
|
|
70
|
+
self.task = task
|
|
71
|
+
self.max_rounds = max_rounds
|
|
72
|
+
|
|
73
|
+
# ── Public API ───────────────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
async def decompose(self, content: str, round_num: int) -> DecomposeResult:
|
|
76
|
+
"""Primary supervisor decomposes the content into per-analyzer tasks."""
|
|
77
|
+
prompt = supervisor_decompose(self.task, content, round_num, self.max_rounds)
|
|
78
|
+
raw = await self._call(self.models[0], prompt)
|
|
79
|
+
parsed = self._parse_json(raw, "decompose")
|
|
80
|
+
return DecomposeResult(
|
|
81
|
+
session_goal = parsed.get("session_goal", ""),
|
|
82
|
+
analyzer_tasks = parsed.get("analyzer_tasks", []),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
async def synthesize(
|
|
86
|
+
self,
|
|
87
|
+
analyzer_reports: list[dict],
|
|
88
|
+
round_num: int,
|
|
89
|
+
) -> SynthesisResult:
|
|
90
|
+
"""
|
|
91
|
+
Synthesize analyzer reports into a final verdict.
|
|
92
|
+
Single supervisor: straightforward.
|
|
93
|
+
Dual supervisor: both run in parallel, votes reconciled.
|
|
94
|
+
"""
|
|
95
|
+
reports_text = json.dumps(analyzer_reports, indent=2)
|
|
96
|
+
prompt = supervisor_synthesize(reports_text, round_num)
|
|
97
|
+
|
|
98
|
+
if len(self.models) == 1:
|
|
99
|
+
return await self._single_synthesis(self.models[0], prompt)
|
|
100
|
+
|
|
101
|
+
# Dual: parallel execution
|
|
102
|
+
r1, r2 = await asyncio.gather(
|
|
103
|
+
self._single_synthesis(self.models[0], prompt),
|
|
104
|
+
self._single_synthesis(self.models[1], prompt),
|
|
105
|
+
)
|
|
106
|
+
return self._reconcile(r1, r2)
|
|
107
|
+
|
|
108
|
+
# ── Private ───────────────────────────────────────────────────────────
|
|
109
|
+
|
|
110
|
+
async def _single_synthesis(self, model: str, prompt: str) -> SynthesisResult:
|
|
111
|
+
raw = await self._call(model, prompt)
|
|
112
|
+
parsed = self._parse_json(raw, f"synthesize[{model.split('/')[-1]}]")
|
|
113
|
+
return SynthesisResult(
|
|
114
|
+
verdict = parsed.get("verdict", "REVISE"),
|
|
115
|
+
overall_score = float(parsed.get("overall_score", 0)),
|
|
116
|
+
summary = parsed.get("summary", ""),
|
|
117
|
+
critical_issues = parsed.get("critical_issues", []),
|
|
118
|
+
coder_instructions = parsed.get("coder_instructions", ""),
|
|
119
|
+
approved_aspects = parsed.get("approved_aspects", []),
|
|
120
|
+
raw = parsed,
|
|
121
|
+
supervisor_votes = [parsed.get("verdict", "REVISE")],
|
|
122
|
+
consensus = True,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def _reconcile(r1: SynthesisResult, r2: SynthesisResult) -> SynthesisResult:
|
|
127
|
+
"""Reconcile two supervisor verdicts. Conservative: REVISE wins on split."""
|
|
128
|
+
votes = [r1.verdict, r2.verdict]
|
|
129
|
+
|
|
130
|
+
if r1.verdict == r2.verdict:
|
|
131
|
+
# Unanimous — easy
|
|
132
|
+
final_verdict = r1.verdict
|
|
133
|
+
consensus = True
|
|
134
|
+
else:
|
|
135
|
+
# Split vote
|
|
136
|
+
both_high = r1.overall_score >= 8.5 and r2.overall_score >= 8.5
|
|
137
|
+
if both_high:
|
|
138
|
+
# Both nearly perfect — allow APPROVED despite split
|
|
139
|
+
final_verdict = "APPROVED"
|
|
140
|
+
else:
|
|
141
|
+
# Safe default: if in doubt, revise
|
|
142
|
+
final_verdict = "REVISE"
|
|
143
|
+
consensus = False
|
|
144
|
+
|
|
145
|
+
# Use the higher-scoring result as primary for narrative fields
|
|
146
|
+
primary = r1 if r1.overall_score >= r2.overall_score else r2
|
|
147
|
+
secondary = r2 if primary is r1 else r1
|
|
148
|
+
|
|
149
|
+
# Deduplicated merge of critical issues
|
|
150
|
+
merged_issues = list(dict.fromkeys(
|
|
151
|
+
primary.critical_issues + secondary.critical_issues
|
|
152
|
+
))
|
|
153
|
+
|
|
154
|
+
# Merge coder instructions (append secondary only when relevant)
|
|
155
|
+
merged_instructions = primary.coder_instructions
|
|
156
|
+
if final_verdict == "REVISE" and secondary.coder_instructions:
|
|
157
|
+
merged_instructions = (
|
|
158
|
+
f"{primary.coder_instructions}\n\n"
|
|
159
|
+
f"[Supervisor 2 additions]\n{secondary.coder_instructions}"
|
|
160
|
+
).strip()
|
|
161
|
+
|
|
162
|
+
return SynthesisResult(
|
|
163
|
+
verdict = final_verdict,
|
|
164
|
+
overall_score = round((r1.overall_score + r2.overall_score) / 2, 1),
|
|
165
|
+
summary = primary.summary,
|
|
166
|
+
critical_issues = merged_issues,
|
|
167
|
+
coder_instructions = merged_instructions,
|
|
168
|
+
approved_aspects = list(dict.fromkeys(
|
|
169
|
+
primary.approved_aspects + secondary.approved_aspects
|
|
170
|
+
)),
|
|
171
|
+
raw = {"supervisor_1": r1.raw, "supervisor_2": r2.raw},
|
|
172
|
+
supervisor_votes = votes,
|
|
173
|
+
consensus = consensus,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
async def _call(self, model: str, user_prompt: str) -> str:
|
|
177
|
+
return await self.client.chat(
|
|
178
|
+
model = model,
|
|
179
|
+
messages = [
|
|
180
|
+
{"role": "system", "content": SUPERVISOR_SYSTEM},
|
|
181
|
+
{"role": "user", "content": user_prompt},
|
|
182
|
+
],
|
|
183
|
+
temperature = 0.2,
|
|
184
|
+
json_mode = True,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
@staticmethod
|
|
188
|
+
def _parse_json(raw: str, context: str) -> dict:
|
|
189
|
+
try:
|
|
190
|
+
return json.loads(raw)
|
|
191
|
+
except json.JSONDecodeError:
|
|
192
|
+
cleaned = (
|
|
193
|
+
raw.strip()
|
|
194
|
+
.removeprefix("```json")
|
|
195
|
+
.removeprefix("```")
|
|
196
|
+
.removesuffix("```")
|
|
197
|
+
.strip()
|
|
198
|
+
)
|
|
199
|
+
try:
|
|
200
|
+
return json.loads(cleaned)
|
|
201
|
+
except Exception as e:
|
|
202
|
+
raise ValueError(
|
|
203
|
+
f"Supervisor {context}: JSON parse failed — {e}\n"
|
|
204
|
+
f"Raw (first 400 chars): {raw[:400]}"
|
|
205
|
+
)
|