second-opinion-mcp 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- second_opinion_mcp/__init__.py +16 -0
- second_opinion_mcp/__main__.py +18 -0
- second_opinion_mcp/cli.py +272 -0
- second_opinion_mcp/server.py +1537 -0
- second_opinion_mcp-0.3.0.dist-info/METADATA +324 -0
- second_opinion_mcp-0.3.0.dist-info/RECORD +9 -0
- second_opinion_mcp-0.3.0.dist-info/WHEEL +4 -0
- second_opinion_mcp-0.3.0.dist-info/entry_points.txt +2 -0
- second_opinion_mcp-0.3.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1537 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Second Opinion MCP Server - Multi-model analysis with DeepSeek, Moonshot (Kimi), and OpenRouter.
|
|
4
|
+
Provides second opinions, consensus debates, critical challenges, and code reviews.
|
|
5
|
+
Secrets via keyring (no env var exposure).
|
|
6
|
+
"""
|
|
7
|
+
import asyncio
|
|
8
|
+
import atexit
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import time
|
|
12
|
+
from asyncio import Semaphore
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Literal
|
|
16
|
+
|
|
17
|
+
import httpx
|
|
18
|
+
import keyring
|
|
19
|
+
from openai import AsyncOpenAI, APIError, RateLimitError, APITimeoutError, APIConnectionError
|
|
20
|
+
from mcp.server.fastmcp import FastMCP
|
|
21
|
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
22
|
+
import logging
|
|
23
|
+
|
|
24
|
+
# Library logging pattern: NullHandler by default, host app configures
|
|
25
|
+
logger = logging.getLogger("second-opinion")
|
|
26
|
+
logger.addHandler(logging.NullHandler())
|
|
27
|
+
|
|
28
|
+
# Logging policy:
|
|
29
|
+
# - ERROR: Failures affecting operation (API errors, file access failures)
|
|
30
|
+
# - WARNING: Recoverable issues (rate limits, truncation, deprecated usage)
|
|
31
|
+
# - INFO: Significant operations (client creation, file saves, workflow steps)
|
|
32
|
+
# - DEBUG: Detailed diagnostics (cache stats, request details, content sizes)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _check_keyring_security() -> None:
|
|
36
|
+
"""Warn or fail if using insecure keyring backend.
|
|
37
|
+
|
|
38
|
+
Detects PlaintextKeyring and NullKeyring which store credentials insecurely.
|
|
39
|
+
Set SECOND_OPINION_STRICT_KEYRING=1 to fail on insecure backends in production.
|
|
40
|
+
"""
|
|
41
|
+
backend_name = str(type(keyring.get_keyring()))
|
|
42
|
+
insecure_backends = ('PlaintextKeyring', 'NullKeyring', 'Null')
|
|
43
|
+
if any(name in backend_name for name in insecure_backends):
|
|
44
|
+
logger.warning(
|
|
45
|
+
"SECURITY WARNING: Using insecure keyring backend (%s). "
|
|
46
|
+
"API keys may be stored in plaintext. For production, use "
|
|
47
|
+
"a secure backend or set SECOND_OPINION_STRICT_KEYRING=1 to fail.",
|
|
48
|
+
backend_name
|
|
49
|
+
)
|
|
50
|
+
if os.environ.get("SECOND_OPINION_STRICT_KEYRING"):
|
|
51
|
+
raise RuntimeError(
|
|
52
|
+
f"Refusing to start with insecure keyring backend: {backend_name}. "
|
|
53
|
+
"Configure a secure backend or unset SECOND_OPINION_STRICT_KEYRING."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# Path security: allowed roots and blocked system paths
|
|
58
|
+
ALLOWED_ROOTS: list[Path] = []
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _init_allowed_roots() -> list[Path]:
|
|
62
|
+
"""Initialize allowed project roots from environment or defaults."""
|
|
63
|
+
env_roots = os.environ.get("SECOND_OPINION_ALLOWED_ROOTS", "")
|
|
64
|
+
if env_roots:
|
|
65
|
+
return [Path(p).resolve() for p in env_roots.split(os.pathsep) if p]
|
|
66
|
+
# Default safe directories (common project locations)
|
|
67
|
+
home = Path.home()
|
|
68
|
+
defaults = [
|
|
69
|
+
home / d for d in [
|
|
70
|
+
"Projects", "repos", "code", "src", "work", "dev",
|
|
71
|
+
"Documents", "OneDrive", "Desktop"
|
|
72
|
+
]
|
|
73
|
+
]
|
|
74
|
+
# On Windows, also check for OneDrive on other drives (business OneDrive)
|
|
75
|
+
if os.name == "nt":
|
|
76
|
+
import glob
|
|
77
|
+
for drive in "CDEFGH":
|
|
78
|
+
# Match "OneDrive - *" business folders
|
|
79
|
+
for onedrive in glob.glob(f"{drive}:\\OneDrive*"):
|
|
80
|
+
defaults.append(Path(onedrive))
|
|
81
|
+
return [p for p in defaults if p.exists()]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _configure_logging() -> None:
|
|
85
|
+
"""Configure logging from environment variables.
|
|
86
|
+
|
|
87
|
+
SECOND_OPINION_LOG_LEVEL: Set log level (DEBUG, INFO, WARNING, ERROR)
|
|
88
|
+
SECOND_OPINION_LOG_ENABLED: If set, enable logging to stderr with timestamps
|
|
89
|
+
"""
|
|
90
|
+
level_name = os.environ.get("SECOND_OPINION_LOG_LEVEL", "WARNING").upper()
|
|
91
|
+
level = getattr(logging, level_name, logging.WARNING)
|
|
92
|
+
|
|
93
|
+
if os.environ.get("SECOND_OPINION_LOG_ENABLED"):
|
|
94
|
+
handler = logging.StreamHandler()
|
|
95
|
+
handler.setFormatter(logging.Formatter(
|
|
96
|
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
97
|
+
))
|
|
98
|
+
logger.addHandler(handler)
|
|
99
|
+
logger.setLevel(level)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _validate_configuration() -> None:
|
|
103
|
+
"""Validate environment configuration at startup.
|
|
104
|
+
|
|
105
|
+
Called after MODELS is defined to validate:
|
|
106
|
+
- SECOND_OPINION_PROVIDERS contains only known providers
|
|
107
|
+
- SECOND_OPINION_ALLOWED_ROOTS paths exist (warning only)
|
|
108
|
+
- Rate limit values are within valid ranges
|
|
109
|
+
"""
|
|
110
|
+
errors = []
|
|
111
|
+
|
|
112
|
+
# Validate SECOND_OPINION_PROVIDERS
|
|
113
|
+
env_providers = os.environ.get("SECOND_OPINION_PROVIDERS", "")
|
|
114
|
+
if env_providers:
|
|
115
|
+
requested = [p.strip().lower() for p in env_providers.split(",") if p.strip()]
|
|
116
|
+
requested = ["moonshot" if p == "kimi" else p for p in requested]
|
|
117
|
+
unknown = [p for p in requested if p not in MODELS]
|
|
118
|
+
if unknown:
|
|
119
|
+
errors.append(f"Unknown providers in SECOND_OPINION_PROVIDERS: {unknown}")
|
|
120
|
+
|
|
121
|
+
# Validate SECOND_OPINION_ALLOWED_ROOTS (warning only, paths may not exist yet)
|
|
122
|
+
env_roots = os.environ.get("SECOND_OPINION_ALLOWED_ROOTS", "")
|
|
123
|
+
if env_roots:
|
|
124
|
+
for p in env_roots.split(os.pathsep):
|
|
125
|
+
if p and not Path(p).exists():
|
|
126
|
+
logger.warning("Allowed root does not exist: %s", p)
|
|
127
|
+
|
|
128
|
+
# Validate rate limit values
|
|
129
|
+
try:
|
|
130
|
+
concurrent = int(os.environ.get("SECOND_OPINION_MOONSHOT_CONCURRENT", "3"))
|
|
131
|
+
if concurrent < 1 or concurrent > 10:
|
|
132
|
+
errors.append(f"SECOND_OPINION_MOONSHOT_CONCURRENT must be 1-10, got {concurrent}")
|
|
133
|
+
except ValueError as e:
|
|
134
|
+
errors.append(f"Invalid SECOND_OPINION_MOONSHOT_CONCURRENT: {e}")
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
rpm = int(os.environ.get("SECOND_OPINION_MOONSHOT_RPM", "20"))
|
|
138
|
+
if rpm < 1 or rpm > 100:
|
|
139
|
+
errors.append(f"SECOND_OPINION_MOONSHOT_RPM must be 1-100, got {rpm}")
|
|
140
|
+
except ValueError as e:
|
|
141
|
+
errors.append(f"Invalid SECOND_OPINION_MOONSHOT_RPM: {e}")
|
|
142
|
+
|
|
143
|
+
if errors:
|
|
144
|
+
raise RuntimeError("Configuration errors: " + "; ".join(errors))
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _is_safe_directory(path: Path) -> bool:
|
|
148
|
+
"""Check if path is under an allowed root, not in blocked system paths."""
|
|
149
|
+
# Defense-in-depth: reject paths with traversal patterns
|
|
150
|
+
path_str = str(path)
|
|
151
|
+
if '..' in path_str:
|
|
152
|
+
logger.warning("Path traversal pattern detected: %s", path_str)
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
resolved = path.resolve()
|
|
156
|
+
|
|
157
|
+
# Block known system directories
|
|
158
|
+
blocked = [
|
|
159
|
+
Path("/etc"), Path("/var"), Path("/root"), Path("/proc"), Path("/sys"),
|
|
160
|
+
Path("/boot"), Path("/lib"), Path("/lib64"), Path("/usr/lib"),
|
|
161
|
+
Path("C:/Windows"), Path("C:/Program Files"), Path("C:/Program Files (x86)"),
|
|
162
|
+
Path("C:/ProgramData"), Path("C:/System Volume Information"),
|
|
163
|
+
]
|
|
164
|
+
for b in blocked:
|
|
165
|
+
try:
|
|
166
|
+
resolved.relative_to(b.resolve())
|
|
167
|
+
return False
|
|
168
|
+
except ValueError:
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
# Must be under an allowed root
|
|
172
|
+
for allowed in ALLOWED_ROOTS:
|
|
173
|
+
try:
|
|
174
|
+
resolved.relative_to(allowed)
|
|
175
|
+
return True
|
|
176
|
+
except ValueError:
|
|
177
|
+
continue
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
mcp = FastMCP("second-opinion")
|
|
182
|
+
|
|
183
|
+
# Initialize security and configuration at module load
|
|
184
|
+
_check_keyring_security()
|
|
185
|
+
ALLOWED_ROOTS.extend(_init_allowed_roots())
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# Input validation constants
|
|
189
|
+
MAX_PATH_LENGTH = 4096
|
|
190
|
+
MAX_PROMPT_LENGTH = 500_000
|
|
191
|
+
MAX_FOCUS_AREAS = 20
|
|
192
|
+
|
|
193
|
+
# Streaming memory limits
|
|
194
|
+
MAX_STREAMING_CONTENT_BYTES = 10 * 1024 * 1024 # 10MB
|
|
195
|
+
MAX_STREAMING_REASONING_BYTES = 5 * 1024 * 1024 # 5MB
|
|
196
|
+
|
|
197
|
+
# Rate Limiting Strategy
|
|
198
|
+
#
|
|
199
|
+
# DeepSeek: NO client-side rate limiting needed
|
|
200
|
+
# - DeepSeek API has no rate limits (can handle 1 trillion tokens/day)
|
|
201
|
+
# - No concurrency limits imposed by the API
|
|
202
|
+
# - During high traffic, requests queue server-side with 10-minute timeout
|
|
203
|
+
# - Source: DeepSeek API Reference - "DeepSeek does not constrain rate limits"
|
|
204
|
+
#
|
|
205
|
+
# Moonshot (Kimi): Client-side rate limiting REQUIRED
|
|
206
|
+
# - Tier 0 limits: 3 concurrent requests, 20 RPM, 500K TPM, 1.5M TPD
|
|
207
|
+
# - Rate limits calculated using max_tokens parameter, not actual output
|
|
208
|
+
# - Source: Moonshot API Reference - Rate Limits section
|
|
209
|
+
#
|
|
210
|
+
# Configurable via environment variables for different API tiers
|
|
211
|
+
MOONSHOT_MAX_CONCURRENT = int(os.environ.get("SECOND_OPINION_MOONSHOT_CONCURRENT", "3"))
|
|
212
|
+
MOONSHOT_RPM_LIMIT = int(os.environ.get("SECOND_OPINION_MOONSHOT_RPM", "20"))
|
|
213
|
+
|
|
214
|
+
# HTTP connection pool configuration
|
|
215
|
+
HTTP_MAX_CONNECTIONS = 10
|
|
216
|
+
HTTP_MAX_KEEPALIVE = 5
|
|
217
|
+
HTTP_CONNECT_TIMEOUT = 10.0
|
|
218
|
+
HTTP_READ_TIMEOUT = 300.0
|
|
219
|
+
|
|
220
|
+
_moonshot_semaphore: Semaphore | None = None
|
|
221
|
+
_moonshot_request_times: list[float] = []
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _get_moonshot_semaphore() -> Semaphore:
|
|
225
|
+
"""Get or create the Moonshot concurrency semaphore."""
|
|
226
|
+
global _moonshot_semaphore
|
|
227
|
+
if _moonshot_semaphore is None:
|
|
228
|
+
_moonshot_semaphore = Semaphore(MOONSHOT_MAX_CONCURRENT)
|
|
229
|
+
return _moonshot_semaphore
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
async def _check_moonshot_rpm() -> None:
|
|
233
|
+
"""Check and enforce Moonshot RPM limit, waiting if necessary."""
|
|
234
|
+
global _moonshot_request_times
|
|
235
|
+
now = time.monotonic()
|
|
236
|
+
# Keep only requests from the last 60 seconds
|
|
237
|
+
_moonshot_request_times = [t for t in _moonshot_request_times if now - t < 60]
|
|
238
|
+
|
|
239
|
+
if len(_moonshot_request_times) >= MOONSHOT_RPM_LIMIT:
|
|
240
|
+
wait_time = 60 - (now - _moonshot_request_times[0]) + 0.1
|
|
241
|
+
if wait_time > 0:
|
|
242
|
+
logger.info("Moonshot RPM limit reached, waiting %.1f seconds", wait_time)
|
|
243
|
+
await asyncio.sleep(wait_time)
|
|
244
|
+
await _check_moonshot_rpm() # Recheck after waiting
|
|
245
|
+
|
|
246
|
+
_moonshot_request_times.append(time.monotonic())
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _validate_string_input(value: str, max_length: int, name: str) -> tuple[str | None, str]:
|
|
250
|
+
"""Validate user-provided string. Returns (error_message, sanitized_value)."""
|
|
251
|
+
if not isinstance(value, str):
|
|
252
|
+
return f"{name} must be a string", ""
|
|
253
|
+
if '\x00' in value:
|
|
254
|
+
logger.warning("Null byte detected in %s input", name)
|
|
255
|
+
return f"{name} contains invalid null byte", ""
|
|
256
|
+
if len(value) > max_length:
|
|
257
|
+
return f"{name} exceeds maximum length of {max_length}", ""
|
|
258
|
+
return None, value
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _validate_path_input(path: str) -> tuple[str | None, str]:
|
|
262
|
+
"""Validate user-provided path string."""
|
|
263
|
+
return _validate_string_input(path, MAX_PATH_LENGTH, "Path")
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _validate_prompt_input(prompt: str) -> tuple[str | None, str]:
|
|
267
|
+
"""Validate user-provided prompt string."""
|
|
268
|
+
return _validate_string_input(prompt, MAX_PROMPT_LENGTH, "Prompt")
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
# Model configurations with full provider support
|
|
272
|
+
MODELS: dict[str, dict] = {
|
|
273
|
+
"deepseek": {
|
|
274
|
+
"enabled": True,
|
|
275
|
+
"keyring_service": "second-opinion",
|
|
276
|
+
"keyring_name": "deepseek",
|
|
277
|
+
"base_url": "https://api.deepseek.com",
|
|
278
|
+
"model": "deepseek-reasoner",
|
|
279
|
+
"available_models": ["deepseek-reasoner", "deepseek-chat"],
|
|
280
|
+
"max_tokens": 32768, # Match reasoner default (max 64K)
|
|
281
|
+
"context_window": 128000,
|
|
282
|
+
"headers": {},
|
|
283
|
+
"requires_streaming": False,
|
|
284
|
+
"supports_streaming": True, # Optional streaming available
|
|
285
|
+
"temperature": 1.0, # Default (0.0 for coding, 1.3 for conversation)
|
|
286
|
+
"supports_thinking": True,
|
|
287
|
+
"supports_json_mode": True, # DeepSeek V3.2 supports JSON output
|
|
288
|
+
},
|
|
289
|
+
"moonshot": {
|
|
290
|
+
"enabled": True,
|
|
291
|
+
"keyring_service": "second-opinion",
|
|
292
|
+
"keyring_name": "moonshot",
|
|
293
|
+
"base_url": "https://api.moonshot.ai/v1",
|
|
294
|
+
"model": "kimi-k2.5",
|
|
295
|
+
"available_models": [
|
|
296
|
+
"kimi-k2.5", # Default, thinking enabled
|
|
297
|
+
"kimi-k2-turbo-preview", # Fast production (60-100 tok/s)
|
|
298
|
+
"kimi-k2-0905-preview", # Agentic coding optimized
|
|
299
|
+
"kimi-k2-thinking", # Deep reasoning
|
|
300
|
+
"kimi-k2-thinking-turbo", # Fast reasoning
|
|
301
|
+
],
|
|
302
|
+
"max_tokens": 32768,
|
|
303
|
+
"context_window": 262144, # 256K tokens
|
|
304
|
+
"headers": {},
|
|
305
|
+
"requires_streaming": True, # Thinking models need streaming
|
|
306
|
+
"temperature": 1.0, # Required for kimi-k2.5 thinking mode (0.6 when thinking disabled)
|
|
307
|
+
"supports_thinking": True,
|
|
308
|
+
"supports_json_mode": True,
|
|
309
|
+
},
|
|
310
|
+
"openrouter": {
|
|
311
|
+
"enabled": False, # Disabled by default
|
|
312
|
+
"keyring_service": "second-opinion",
|
|
313
|
+
"keyring_name": "openrouter",
|
|
314
|
+
"base_url": "https://openrouter.ai/api/v1",
|
|
315
|
+
"model": "moonshotai/kimi-k2",
|
|
316
|
+
"available_models": ["moonshotai/kimi-k2", "deepseek/deepseek-r1"],
|
|
317
|
+
"max_tokens": 16384,
|
|
318
|
+
"context_window": 128000,
|
|
319
|
+
"headers": {},
|
|
320
|
+
"requires_streaming": False,
|
|
321
|
+
"temperature": None,
|
|
322
|
+
"supports_thinking": False,
|
|
323
|
+
"supports_json_mode": False,
|
|
324
|
+
},
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _init_enabled_providers() -> None:
|
|
329
|
+
"""Initialize enabled providers from SECOND_OPINION_PROVIDERS env var."""
|
|
330
|
+
env_providers = os.environ.get("SECOND_OPINION_PROVIDERS", "")
|
|
331
|
+
if env_providers:
|
|
332
|
+
requested = [p.strip().lower() for p in env_providers.split(",") if p.strip()]
|
|
333
|
+
# Map kimi alias to moonshot
|
|
334
|
+
requested = ["moonshot" if p == "kimi" else p for p in requested]
|
|
335
|
+
for provider in MODELS:
|
|
336
|
+
MODELS[provider]["enabled"] = provider in requested
|
|
337
|
+
logger.info("Providers configured from env: %s", requested)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# Initialize providers and validate configuration at module load
|
|
341
|
+
_init_enabled_providers()
|
|
342
|
+
_configure_logging()
|
|
343
|
+
_validate_configuration()
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
# Provider type includes backwards-compatible kimi alias
|
|
347
|
+
Provider = Literal["deepseek", "moonshot", "openrouter", "kimi"]
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _validate_provider(provider: str) -> tuple[str, str | None]:
|
|
351
|
+
"""Validate and normalize provider. Returns (normalized_provider, error_or_None)."""
|
|
352
|
+
# Handle kimi -> moonshot alias
|
|
353
|
+
if provider == "kimi":
|
|
354
|
+
logger.warning("Provider 'kimi' is deprecated, using 'moonshot' instead")
|
|
355
|
+
provider = "moonshot"
|
|
356
|
+
|
|
357
|
+
if provider not in MODELS:
|
|
358
|
+
available = [p for p in MODELS if MODELS[p].get("enabled")]
|
|
359
|
+
return provider, f"Error: Unknown provider '{provider}'. Available: {', '.join(available)}"
|
|
360
|
+
|
|
361
|
+
if not MODELS[provider].get("enabled"):
|
|
362
|
+
available = [p for p in MODELS if MODELS[p].get("enabled")]
|
|
363
|
+
return provider, f"Error: Provider '{provider}' is disabled. Enabled: {', '.join(available)}"
|
|
364
|
+
|
|
365
|
+
return provider, None
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def _validate_model(provider: str, model: str) -> str | None:
|
|
369
|
+
"""Validate model for provider. Returns error message if invalid, None if OK."""
|
|
370
|
+
if not model:
|
|
371
|
+
return None # Use default
|
|
372
|
+
config = MODELS.get(provider)
|
|
373
|
+
if not config:
|
|
374
|
+
return f"Error: Unknown provider '{provider}'"
|
|
375
|
+
available = config.get("available_models", [])
|
|
376
|
+
if model not in available:
|
|
377
|
+
return f"Error: Model '{model}' not available for {provider}. Available: {', '.join(available)}"
|
|
378
|
+
return None
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _validate_api_key(provider: str, key: str) -> tuple[bool, str]:
|
|
382
|
+
"""Validate API key format for provider.
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
(valid, error_message) tuple. If valid is True, error_message is empty.
|
|
386
|
+
"""
|
|
387
|
+
if not key or not key.strip():
|
|
388
|
+
return False, f"Empty API key for {provider}"
|
|
389
|
+
|
|
390
|
+
# Provider-specific format validators
|
|
391
|
+
validators = {
|
|
392
|
+
"deepseek": lambda k: k.startswith("sk-") and len(k) >= 20,
|
|
393
|
+
"moonshot": lambda k: bool(re.match(r'^[a-zA-Z0-9_-]{32,}$', k)),
|
|
394
|
+
"openrouter": lambda k: k.startswith("sk-or-") and len(k) >= 20,
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
validator = validators.get(provider)
|
|
398
|
+
if validator and not validator(key):
|
|
399
|
+
return False, f"Invalid {provider} API key format"
|
|
400
|
+
return True, ""
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def get_key(provider: str) -> str:
|
|
404
|
+
"""Retrieve and validate API key from OS keyring for the specified provider."""
|
|
405
|
+
config = MODELS[provider]
|
|
406
|
+
try:
|
|
407
|
+
key = keyring.get_password(config["keyring_service"], config["keyring_name"])
|
|
408
|
+
except Exception as e:
|
|
409
|
+
logger.error("Keyring access failed for %s: %s", provider, e)
|
|
410
|
+
raise RuntimeError(f"Keyring access failed for {provider}. Check keyring configuration.")
|
|
411
|
+
if not key:
|
|
412
|
+
raise RuntimeError(
|
|
413
|
+
f"No API key configured for {provider}. "
|
|
414
|
+
f"Run: keyring set {config['keyring_service']} {config['keyring_name']}"
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
valid, error = _validate_api_key(provider, key)
|
|
418
|
+
if not valid:
|
|
419
|
+
raise RuntimeError(error)
|
|
420
|
+
|
|
421
|
+
logger.debug("Retrieved API key for %s", provider)
|
|
422
|
+
return key
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
# Client cache for connection reuse
|
|
426
|
+
_clients: dict[str, AsyncOpenAI] = {}
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def get_client(provider: str) -> AsyncOpenAI:
|
|
430
|
+
"""Get or create a cached AsyncOpenAI client for the specified provider."""
|
|
431
|
+
if provider not in _clients:
|
|
432
|
+
config = MODELS[provider]
|
|
433
|
+
|
|
434
|
+
# Configure connection pool limits
|
|
435
|
+
transport = httpx.AsyncHTTPTransport(
|
|
436
|
+
limits=httpx.Limits(
|
|
437
|
+
max_connections=HTTP_MAX_CONNECTIONS,
|
|
438
|
+
max_keepalive_connections=HTTP_MAX_KEEPALIVE,
|
|
439
|
+
),
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
# Configure timeouts
|
|
443
|
+
timeout = httpx.Timeout(
|
|
444
|
+
connect=HTTP_CONNECT_TIMEOUT,
|
|
445
|
+
read=HTTP_READ_TIMEOUT,
|
|
446
|
+
write=30.0,
|
|
447
|
+
pool=30.0,
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
# Build http client with pool and timeout config
|
|
451
|
+
http_client_kwargs: dict = {
|
|
452
|
+
"transport": transport,
|
|
453
|
+
"timeout": timeout,
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
if config["headers"]:
|
|
457
|
+
# Add event hooks to inject headers on every request
|
|
458
|
+
async def inject_headers(request: httpx.Request) -> None:
|
|
459
|
+
for key, value in config["headers"].items():
|
|
460
|
+
request.headers[key] = value
|
|
461
|
+
http_client_kwargs["event_hooks"] = {"request": [inject_headers]}
|
|
462
|
+
|
|
463
|
+
http_client = httpx.AsyncClient(**http_client_kwargs)
|
|
464
|
+
|
|
465
|
+
_clients[provider] = AsyncOpenAI(
|
|
466
|
+
api_key=get_key(provider),
|
|
467
|
+
base_url=config["base_url"],
|
|
468
|
+
http_client=http_client,
|
|
469
|
+
)
|
|
470
|
+
logger.info("Created API client for %s (max_conn=%d)", provider, HTTP_MAX_CONNECTIONS)
|
|
471
|
+
return _clients[provider]
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
async def cleanup_clients() -> None:
|
|
475
|
+
"""Close all cached HTTP clients. Call on shutdown."""
|
|
476
|
+
for provider, client in _clients.items():
|
|
477
|
+
try:
|
|
478
|
+
await client.close()
|
|
479
|
+
logger.info("Closed client for %s", provider)
|
|
480
|
+
except Exception as e:
|
|
481
|
+
logger.warning("Error closing client for %s: %s", provider, e)
|
|
482
|
+
_clients.clear()
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
# Retry configuration for transient API failures
|
|
486
|
+
# Retries: 3 attempts with exponential backoff (2s, 4s, 8s max 30s)
|
|
487
|
+
# Only retries on timeout and connection errors (not rate limits or API errors)
|
|
488
|
+
_retry_decorator = retry(
|
|
489
|
+
stop=stop_after_attempt(3),
|
|
490
|
+
wait=wait_exponential(multiplier=2, min=2, max=30),
|
|
491
|
+
retry=retry_if_exception_type((APITimeoutError, APIConnectionError)),
|
|
492
|
+
reraise=True,
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
async def _call_model_sync(
|
|
497
|
+
provider: str,
|
|
498
|
+
prompt: str,
|
|
499
|
+
system: str = "",
|
|
500
|
+
model: str = "",
|
|
501
|
+
thinking_disabled: bool = False,
|
|
502
|
+
temperature: float | None = None,
|
|
503
|
+
) -> tuple[str, str]:
|
|
504
|
+
"""
|
|
505
|
+
Synchronous (non-streaming) call to a provider with automatic retry.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
provider: Provider name
|
|
509
|
+
prompt: User prompt
|
|
510
|
+
system: Optional system prompt
|
|
511
|
+
model: Optional model override
|
|
512
|
+
thinking_disabled: If True, use fast non-thinking model for DeepSeek
|
|
513
|
+
temperature: Optional temperature override (default uses config)
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
(content, reasoning_content) tuple. reasoning_content may be empty.
|
|
517
|
+
"""
|
|
518
|
+
config = MODELS[provider]
|
|
519
|
+
client = get_client(provider)
|
|
520
|
+
model_name = model or config["model"]
|
|
521
|
+
|
|
522
|
+
# DeepSeek fast mode: switch to deepseek-chat (non-thinking model)
|
|
523
|
+
if provider == "deepseek" and thinking_disabled and model_name == "deepseek-reasoner":
|
|
524
|
+
model_name = "deepseek-chat"
|
|
525
|
+
logger.debug("DeepSeek fast mode: switched to %s", model_name)
|
|
526
|
+
|
|
527
|
+
messages = []
|
|
528
|
+
if system:
|
|
529
|
+
messages.append({"role": "system", "content": system})
|
|
530
|
+
messages.append({"role": "user", "content": prompt})
|
|
531
|
+
|
|
532
|
+
kwargs: dict = {
|
|
533
|
+
"model": model_name,
|
|
534
|
+
"messages": messages,
|
|
535
|
+
"max_tokens": config["max_tokens"],
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
# Temperature priority: explicit param > config > None
|
|
539
|
+
effective_temp = temperature if temperature is not None else config.get("temperature")
|
|
540
|
+
if effective_temp is not None:
|
|
541
|
+
kwargs["temperature"] = effective_temp
|
|
542
|
+
|
|
543
|
+
# Inner function with retry decorator for transient failures
|
|
544
|
+
@_retry_decorator
|
|
545
|
+
async def _make_api_call():
|
|
546
|
+
logger.debug("Calling %s (sync) model=%s temp=%s with %d chars prompt",
|
|
547
|
+
provider, model_name, effective_temp, len(prompt))
|
|
548
|
+
return await client.chat.completions.create(**kwargs)
|
|
549
|
+
|
|
550
|
+
try:
|
|
551
|
+
resp = await _make_api_call()
|
|
552
|
+
content = resp.choices[0].message.content or ""
|
|
553
|
+
reasoning = getattr(resp.choices[0].message, "reasoning_content", "") or ""
|
|
554
|
+
logger.debug("Received %d chars response from %s", len(content), provider)
|
|
555
|
+
|
|
556
|
+
# Log cache stats for DeepSeek (automatic disk-based caching)
|
|
557
|
+
if hasattr(resp, 'usage') and resp.usage:
|
|
558
|
+
cache_hit = getattr(resp.usage, 'prompt_cache_hit_tokens', 0) or 0
|
|
559
|
+
cache_miss = getattr(resp.usage, 'prompt_cache_miss_tokens', 0) or 0
|
|
560
|
+
if cache_hit or cache_miss:
|
|
561
|
+
logger.debug("Cache stats for %s: hit=%d miss=%d tokens",
|
|
562
|
+
provider, cache_hit, cache_miss)
|
|
563
|
+
|
|
564
|
+
return content, reasoning
|
|
565
|
+
except RateLimitError as e:
|
|
566
|
+
logger.warning("Rate limit hit for %s: %s", provider, e)
|
|
567
|
+
return f"Error: Rate limit exceeded for {provider}. Please wait and retry.", ""
|
|
568
|
+
except APITimeoutError as e:
|
|
569
|
+
# After retries exhausted
|
|
570
|
+
logger.error("Timeout for %s after retries: %s", provider, e)
|
|
571
|
+
return f"Error: Request to {provider} timed out after retries. Try again later.", ""
|
|
572
|
+
except APIConnectionError as e:
|
|
573
|
+
# After retries exhausted
|
|
574
|
+
logger.error("Connection error for %s after retries: %s", provider, e)
|
|
575
|
+
return f"Error: Could not connect to {provider} after retries. Check network.", ""
|
|
576
|
+
except APIError as e:
|
|
577
|
+
logger.error("API error for %s: %s", provider, e)
|
|
578
|
+
return f"Error: API failure from {provider}.", ""
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
async def _call_model_streaming(
|
|
582
|
+
provider: str,
|
|
583
|
+
prompt: str,
|
|
584
|
+
system: str = "",
|
|
585
|
+
model: str = "",
|
|
586
|
+
thinking_disabled: bool = False,
|
|
587
|
+
temperature: float | None = None,
|
|
588
|
+
) -> tuple[str, str]:
|
|
589
|
+
"""
|
|
590
|
+
Streaming call for Moonshot thinking models with automatic retry.
|
|
591
|
+
|
|
592
|
+
Moonshot's kimi-k2.5 with thinking mode requires streaming to avoid timeouts
|
|
593
|
+
and to access reasoning_content from delta chunks.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
provider: Provider name
|
|
597
|
+
prompt: User prompt
|
|
598
|
+
system: Optional system prompt
|
|
599
|
+
model: Optional model override
|
|
600
|
+
thinking_disabled: If True, disable thinking mode for Moonshot
|
|
601
|
+
temperature: Optional temperature override (default uses config)
|
|
602
|
+
|
|
603
|
+
Returns:
|
|
604
|
+
(content, reasoning_content) tuple.
|
|
605
|
+
"""
|
|
606
|
+
config = MODELS[provider]
|
|
607
|
+
client = get_client(provider)
|
|
608
|
+
model_name = model or config["model"]
|
|
609
|
+
|
|
610
|
+
messages = []
|
|
611
|
+
if system:
|
|
612
|
+
messages.append({"role": "system", "content": system})
|
|
613
|
+
messages.append({"role": "user", "content": prompt})
|
|
614
|
+
|
|
615
|
+
kwargs: dict = {
|
|
616
|
+
"model": model_name,
|
|
617
|
+
"messages": messages,
|
|
618
|
+
"max_tokens": config["max_tokens"],
|
|
619
|
+
"stream": True,
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
# Handle thinking mode toggle for Moonshot
|
|
623
|
+
# IMPORTANT: Moonshot kimi-k2.5 has STRICT temperature requirements that OVERRIDE user preferences:
|
|
624
|
+
# - Thinking ENABLED: temperature MUST be 1.0 (any other value causes 400 error)
|
|
625
|
+
# - Thinking DISABLED: temperature MUST be 0.6 (any other value causes 400 error)
|
|
626
|
+
if provider == "moonshot":
|
|
627
|
+
if thinking_disabled:
|
|
628
|
+
kwargs["extra_body"] = {"thinking": {"type": "disabled"}}
|
|
629
|
+
kwargs["temperature"] = 0.6 # Required when thinking is disabled
|
|
630
|
+
else:
|
|
631
|
+
kwargs["temperature"] = 1.0 # Required when thinking is enabled
|
|
632
|
+
else:
|
|
633
|
+
# Temperature priority for non-Moonshot: explicit param > config > None
|
|
634
|
+
effective_temp = temperature if temperature is not None else config.get("temperature")
|
|
635
|
+
if effective_temp is not None:
|
|
636
|
+
kwargs["temperature"] = effective_temp
|
|
637
|
+
|
|
638
|
+
# Inner function with retry for transient connection failures
|
|
639
|
+
@_retry_decorator
|
|
640
|
+
async def _do_streaming_call():
|
|
641
|
+
content_parts = []
|
|
642
|
+
reasoning_parts = []
|
|
643
|
+
content_size = 0
|
|
644
|
+
reasoning_size = 0
|
|
645
|
+
content_truncated = False
|
|
646
|
+
reasoning_truncated = False
|
|
647
|
+
final_usage = None
|
|
648
|
+
|
|
649
|
+
async with await client.chat.completions.create(**kwargs) as stream:
|
|
650
|
+
async for chunk in stream:
|
|
651
|
+
if chunk.choices and chunk.choices[0].delta:
|
|
652
|
+
delta = chunk.choices[0].delta
|
|
653
|
+
if hasattr(delta, "content") and delta.content:
|
|
654
|
+
chunk_bytes = len(delta.content.encode("utf-8"))
|
|
655
|
+
if content_size + chunk_bytes <= MAX_STREAMING_CONTENT_BYTES:
|
|
656
|
+
content_parts.append(delta.content)
|
|
657
|
+
content_size += chunk_bytes
|
|
658
|
+
elif not content_truncated:
|
|
659
|
+
logger.warning("Content size limit reached (%d bytes)", content_size)
|
|
660
|
+
content_truncated = True
|
|
661
|
+
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
|
|
662
|
+
chunk_bytes = len(delta.reasoning_content.encode("utf-8"))
|
|
663
|
+
if reasoning_size + chunk_bytes <= MAX_STREAMING_REASONING_BYTES:
|
|
664
|
+
reasoning_parts.append(delta.reasoning_content)
|
|
665
|
+
reasoning_size += chunk_bytes
|
|
666
|
+
elif not reasoning_truncated:
|
|
667
|
+
logger.warning("Reasoning size limit reached (%d bytes)", reasoning_size)
|
|
668
|
+
reasoning_truncated = True
|
|
669
|
+
if hasattr(chunk, 'usage') and chunk.usage:
|
|
670
|
+
final_usage = chunk.usage
|
|
671
|
+
|
|
672
|
+
return content_parts, reasoning_parts, content_truncated, reasoning_truncated, final_usage
|
|
673
|
+
|
|
674
|
+
try:
|
|
675
|
+
logger.debug("Calling %s (streaming) model=%s temp=%s with %d chars prompt",
|
|
676
|
+
provider, model_name, kwargs.get("temperature"), len(prompt))
|
|
677
|
+
|
|
678
|
+
# Apply Moonshot rate limiting (DeepSeek has no rate limits)
|
|
679
|
+
if provider == "moonshot":
|
|
680
|
+
await _check_moonshot_rpm()
|
|
681
|
+
semaphore = _get_moonshot_semaphore()
|
|
682
|
+
else:
|
|
683
|
+
semaphore = None
|
|
684
|
+
|
|
685
|
+
if semaphore:
|
|
686
|
+
async with semaphore:
|
|
687
|
+
content_parts, reasoning_parts, content_truncated, reasoning_truncated, final_usage = await _do_streaming_call()
|
|
688
|
+
else:
|
|
689
|
+
content_parts, reasoning_parts, content_truncated, reasoning_truncated, final_usage = await _do_streaming_call()
|
|
690
|
+
|
|
691
|
+
content = "".join(content_parts)
|
|
692
|
+
reasoning = "".join(reasoning_parts)
|
|
693
|
+
|
|
694
|
+
if content_truncated:
|
|
695
|
+
content += "\n\n[Response truncated due to size limits]"
|
|
696
|
+
if reasoning_truncated:
|
|
697
|
+
reasoning += "\n\n[Reasoning truncated due to size limits]"
|
|
698
|
+
|
|
699
|
+
logger.debug("Received %d chars content, %d chars reasoning from %s",
|
|
700
|
+
len(content), len(reasoning), provider)
|
|
701
|
+
|
|
702
|
+
# Log cache stats for streaming responses
|
|
703
|
+
if final_usage:
|
|
704
|
+
cache_hit = getattr(final_usage, 'prompt_cache_hit_tokens', 0) or 0
|
|
705
|
+
cache_miss = getattr(final_usage, 'prompt_cache_miss_tokens', 0) or 0
|
|
706
|
+
if cache_hit or cache_miss:
|
|
707
|
+
logger.debug("Cache stats for %s: hit=%d miss=%d tokens",
|
|
708
|
+
provider, cache_hit, cache_miss)
|
|
709
|
+
|
|
710
|
+
return content, reasoning
|
|
711
|
+
except RateLimitError as e:
|
|
712
|
+
logger.warning("Rate limit hit for %s: %s", provider, e)
|
|
713
|
+
return f"Error: Rate limit exceeded for {provider}. Please wait and retry.", ""
|
|
714
|
+
except APITimeoutError as e:
|
|
715
|
+
logger.error("Timeout for %s after retries: %s", provider, e)
|
|
716
|
+
return f"Error: Request to {provider} timed out after retries. Try again later.", ""
|
|
717
|
+
except APIConnectionError as e:
|
|
718
|
+
logger.error("Connection error for %s after retries: %s", provider, e)
|
|
719
|
+
return f"Error: Could not connect to {provider} after retries. Check network.", ""
|
|
720
|
+
except APIError as e:
|
|
721
|
+
logger.error("API error for %s: %s", provider, e)
|
|
722
|
+
return f"Error: API failure from {provider}.", ""
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
async def call_model(
|
|
726
|
+
provider: str,
|
|
727
|
+
prompt: str,
|
|
728
|
+
system: str = "",
|
|
729
|
+
model: str = "",
|
|
730
|
+
include_reasoning: bool = False,
|
|
731
|
+
fast: bool = False,
|
|
732
|
+
temperature: float | None = None,
|
|
733
|
+
use_streaming: bool = False,
|
|
734
|
+
) -> str:
|
|
735
|
+
"""
|
|
736
|
+
Make a completion call to the specified provider with error handling.
|
|
737
|
+
|
|
738
|
+
Args:
|
|
739
|
+
provider: Provider name (deepseek, moonshot, openrouter)
|
|
740
|
+
prompt: User prompt
|
|
741
|
+
system: Optional system prompt
|
|
742
|
+
model: Optional model override
|
|
743
|
+
include_reasoning: If True and provider supports it, include reasoning in output
|
|
744
|
+
fast: If True, disable thinking mode for faster responses
|
|
745
|
+
temperature: Optional temperature override (0.0=coding, 1.0=default, 1.3=creative)
|
|
746
|
+
use_streaming: If True and provider supports it, use streaming for long responses
|
|
747
|
+
|
|
748
|
+
Returns:
|
|
749
|
+
Response content, optionally with reasoning prefix
|
|
750
|
+
"""
|
|
751
|
+
config = MODELS[provider]
|
|
752
|
+
model_name = model or config["model"]
|
|
753
|
+
|
|
754
|
+
# Determine whether to use streaming
|
|
755
|
+
# Moonshot thinking models (kimi-k2.5, kimi-k2-thinking*) require streaming
|
|
756
|
+
# Preview models (kimi-k2-0905-preview, kimi-k2-turbo-preview) don't require streaming
|
|
757
|
+
moonshot_thinking_models = {"kimi-k2.5", "kimi-k2-thinking", "kimi-k2-thinking-turbo"}
|
|
758
|
+
if provider == "moonshot":
|
|
759
|
+
needs_streaming = model_name in moonshot_thinking_models
|
|
760
|
+
else:
|
|
761
|
+
needs_streaming = config.get("requires_streaming") or (
|
|
762
|
+
use_streaming and config.get("supports_streaming")
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
if needs_streaming:
|
|
766
|
+
content, reasoning = await _call_model_streaming(
|
|
767
|
+
provider, prompt, system, model,
|
|
768
|
+
thinking_disabled=fast, temperature=temperature
|
|
769
|
+
)
|
|
770
|
+
else:
|
|
771
|
+
content, reasoning = await _call_model_sync(
|
|
772
|
+
provider, prompt, system, model,
|
|
773
|
+
thinking_disabled=fast, temperature=temperature
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
# Format output with reasoning if requested and available
|
|
777
|
+
if include_reasoning and reasoning:
|
|
778
|
+
return f"**Reasoning:**\n{reasoning}\n\n**Response:**\n{content}"
|
|
779
|
+
return content
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
@mcp.tool()
|
|
783
|
+
async def second_opinion(
|
|
784
|
+
prompt: str,
|
|
785
|
+
context: str = "",
|
|
786
|
+
provider: Provider = "deepseek",
|
|
787
|
+
include_reasoning: bool = False,
|
|
788
|
+
fast: bool = False,
|
|
789
|
+
) -> str:
|
|
790
|
+
"""
|
|
791
|
+
Get a second opinion from DeepSeek Reasoner, Moonshot (Kimi K2.5), or OpenRouter.
|
|
792
|
+
|
|
793
|
+
Args:
|
|
794
|
+
prompt: The question or code to review
|
|
795
|
+
context: Optional background context
|
|
796
|
+
provider: Model to use - "deepseek" (default), "moonshot", or "openrouter"
|
|
797
|
+
include_reasoning: If True, include chain-of-thought reasoning in output
|
|
798
|
+
fast: If True, disable thinking mode for faster responses
|
|
799
|
+
|
|
800
|
+
Returns:
|
|
801
|
+
Analysis with optional chain-of-thought reasoning
|
|
802
|
+
"""
|
|
803
|
+
if err := _validate_prompt_input(prompt)[0]:
|
|
804
|
+
return f"Error: {err}"
|
|
805
|
+
if context and (err := _validate_prompt_input(context)[0]):
|
|
806
|
+
return f"Error: {err}"
|
|
807
|
+
provider, err = _validate_provider(provider)
|
|
808
|
+
if err:
|
|
809
|
+
return err
|
|
810
|
+
full_prompt = f"{context}\n\n{prompt}" if context else prompt
|
|
811
|
+
# Temperature 1.0: balanced default for general queries
|
|
812
|
+
return await call_model(
|
|
813
|
+
provider, full_prompt,
|
|
814
|
+
include_reasoning=include_reasoning, fast=fast, temperature=1.0
|
|
815
|
+
)
|
|
816
|
+
|
|
817
|
+
|
|
818
|
+
@mcp.tool()
|
|
819
|
+
async def challenge(
|
|
820
|
+
proposal: str,
|
|
821
|
+
context: str = "",
|
|
822
|
+
provider: Provider = "moonshot",
|
|
823
|
+
include_reasoning: bool = True,
|
|
824
|
+
fast: bool = False,
|
|
825
|
+
) -> str:
|
|
826
|
+
"""
|
|
827
|
+
Get a critical analysis of a proposal. Anti-sycophancy mode - the model
|
|
828
|
+
is instructed to find weaknesses, not validate.
|
|
829
|
+
|
|
830
|
+
Args:
|
|
831
|
+
proposal: The idea, code, or plan to critique
|
|
832
|
+
context: Optional background context
|
|
833
|
+
provider: Model to use - "moonshot" (default), "deepseek", or "openrouter"
|
|
834
|
+
include_reasoning: If True, include chain-of-thought reasoning (default True for challenge)
|
|
835
|
+
fast: If True, disable thinking mode for faster responses
|
|
836
|
+
|
|
837
|
+
Returns:
|
|
838
|
+
Critical analysis highlighting weaknesses and risks
|
|
839
|
+
"""
|
|
840
|
+
if err := _validate_prompt_input(proposal)[0]:
|
|
841
|
+
return f"Error: {err}"
|
|
842
|
+
provider, err = _validate_provider(provider)
|
|
843
|
+
if err:
|
|
844
|
+
return err
|
|
845
|
+
system = """CRITICAL ANALYSIS MODE - You must find weaknesses in this proposal.
|
|
846
|
+
Do NOT agree or validate. Your job is to:
|
|
847
|
+
1. Identify logical flaws
|
|
848
|
+
2. Find edge cases that fail
|
|
849
|
+
3. Point out unsafe assumptions
|
|
850
|
+
4. Highlight security/performance issues
|
|
851
|
+
5. Suggest what the author missed
|
|
852
|
+
|
|
853
|
+
Be constructive but ruthless. The goal is to improve the proposal by exposing its weaknesses."""
|
|
854
|
+
|
|
855
|
+
full_prompt = f"Context:\n{context}\n\nProposal to critique:\n{proposal}" if context else f"Proposal to critique:\n{proposal}"
|
|
856
|
+
# Temperature 1.3: creative critique needs diversity
|
|
857
|
+
return await call_model(
|
|
858
|
+
provider, full_prompt, system,
|
|
859
|
+
include_reasoning=include_reasoning, fast=fast, temperature=1.3
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
|
|
863
|
+
def _discover_reviewable_files(
|
|
864
|
+
dir_path: Path,
|
|
865
|
+
max_size: int = 100_000,
|
|
866
|
+
max_file_size: int = 50_000,
|
|
867
|
+
max_files: int = 100,
|
|
868
|
+
max_dirs: int = 500,
|
|
869
|
+
) -> tuple[list[str], list[str], int]:
|
|
870
|
+
"""
|
|
871
|
+
Discover files to review in a directory.
|
|
872
|
+
|
|
873
|
+
Applies security filtering to skip sensitive files, symlinks, binary files,
|
|
874
|
+
and directories that may contain secrets.
|
|
875
|
+
|
|
876
|
+
Args:
|
|
877
|
+
dir_path: Resolved path to the directory to scan
|
|
878
|
+
max_size: Maximum total content size in bytes (default 100KB)
|
|
879
|
+
max_file_size: Maximum individual file size in bytes (default 50KB)
|
|
880
|
+
max_files: Maximum number of files to process (default 100)
|
|
881
|
+
max_dirs: Maximum number of directories to scan (default 500)
|
|
882
|
+
|
|
883
|
+
Returns:
|
|
884
|
+
(file_list, files_content, total_size) where:
|
|
885
|
+
- file_list: Human-readable list of files found (with skip reasons)
|
|
886
|
+
- files_content: List of formatted file contents for review
|
|
887
|
+
- total_size: Total bytes of content collected
|
|
888
|
+
"""
|
|
889
|
+
# Security: directories that may contain secrets or are not code
|
|
890
|
+
skip_dirs = {
|
|
891
|
+
"__pycache__", ".git", "node_modules", ".venv", "venv",
|
|
892
|
+
".mypy_cache", ".pytest_cache", "dist", "build", ".egg-info",
|
|
893
|
+
".ssh", ".gnupg", ".aws", ".docker", ".kube", ".azure",
|
|
894
|
+
".config", "secrets", "credentials", ".credentials",
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
# Security: files that commonly contain secrets
|
|
898
|
+
skip_files = {
|
|
899
|
+
".env", ".env.local", ".env.development", ".env.production", ".env.test",
|
|
900
|
+
"id_rsa", "id_rsa.pub", "id_ed25519", "id_ed25519.pub", "id_dsa",
|
|
901
|
+
".npmrc", ".pypirc", ".netrc", ".htpasswd",
|
|
902
|
+
"credentials.json", "service-account.json", "secrets.json",
|
|
903
|
+
"config.json", # Often contains API keys
|
|
904
|
+
".dockerconfigjson", "kubeconfig",
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
skip_extensions = {
|
|
908
|
+
".pyc", ".pyo", ".exe", ".dll", ".so", ".dylib", ".bin", ".lock",
|
|
909
|
+
".pem", ".key", ".p12", ".pfx", ".crt", ".cer", # Certificates/keys
|
|
910
|
+
# Binary/media files
|
|
911
|
+
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico", ".svg", ".webp",
|
|
912
|
+
".mp3", ".mp4", ".wav", ".avi", ".mov", ".mkv",
|
|
913
|
+
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
|
|
914
|
+
".zip", ".tar", ".gz", ".rar", ".7z",
|
|
915
|
+
".woff", ".woff2", ".ttf", ".eot", ".otf",
|
|
916
|
+
".db", ".sqlite", ".sqlite3",
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
files_content = []
|
|
920
|
+
file_list = []
|
|
921
|
+
total_size = 0
|
|
922
|
+
files_count = 0
|
|
923
|
+
dirs_count = 0
|
|
924
|
+
limits_reached = False
|
|
925
|
+
|
|
926
|
+
for root, dirs, files in os.walk(dir_path, followlinks=False):
|
|
927
|
+
# Check directory limit
|
|
928
|
+
dirs_count += 1
|
|
929
|
+
if dirs_count >= max_dirs:
|
|
930
|
+
file_list.append(f"- (directory limit of {max_dirs} reached)")
|
|
931
|
+
break
|
|
932
|
+
|
|
933
|
+
# Filter out skip directories
|
|
934
|
+
dirs[:] = [d for d in dirs if d not in skip_dirs and not d.startswith(".")]
|
|
935
|
+
|
|
936
|
+
for filename in files:
|
|
937
|
+
# Check file limit
|
|
938
|
+
if files_count >= max_files:
|
|
939
|
+
if not limits_reached:
|
|
940
|
+
file_list.append(f"- (file limit of {max_files} reached)")
|
|
941
|
+
limits_reached = True
|
|
942
|
+
break
|
|
943
|
+
|
|
944
|
+
if any(filename.endswith(ext) for ext in skip_extensions):
|
|
945
|
+
continue
|
|
946
|
+
|
|
947
|
+
filepath = Path(root) / filename
|
|
948
|
+
rel_path = filepath.relative_to(dir_path)
|
|
949
|
+
|
|
950
|
+
# TOCTOU-safe symlink check using lstat (doesn't follow symlinks)
|
|
951
|
+
try:
|
|
952
|
+
lstat_result = filepath.lstat()
|
|
953
|
+
if lstat_result.st_mode & 0o170000 == 0o120000: # S_IFLNK
|
|
954
|
+
file_list.append(f"- {rel_path} (skipped: symlink)")
|
|
955
|
+
continue
|
|
956
|
+
except OSError:
|
|
957
|
+
file_list.append(f"- {rel_path} (skipped: cannot stat)")
|
|
958
|
+
continue
|
|
959
|
+
|
|
960
|
+
# Skip sensitive files
|
|
961
|
+
if filename in skip_files or filename.lower() in skip_files:
|
|
962
|
+
file_list.append(f"- {rel_path} (skipped: sensitive file)")
|
|
963
|
+
logger.info("Skipped sensitive file: %s", rel_path)
|
|
964
|
+
continue
|
|
965
|
+
|
|
966
|
+
try:
|
|
967
|
+
# Use lstat result for size check (already have it)
|
|
968
|
+
if lstat_result.st_size > max_file_size:
|
|
969
|
+
file_list.append(f"- {rel_path} (skipped: file too large, {lstat_result.st_size} bytes)")
|
|
970
|
+
continue
|
|
971
|
+
|
|
972
|
+
content = filepath.read_text(encoding="utf-8", errors="ignore")
|
|
973
|
+
# Use bytes for consistent size calculation
|
|
974
|
+
content_bytes = len(content.encode("utf-8"))
|
|
975
|
+
|
|
976
|
+
if total_size + content_bytes > max_size:
|
|
977
|
+
file_list.append(f"- {rel_path} (skipped: size limit reached)")
|
|
978
|
+
continue
|
|
979
|
+
|
|
980
|
+
total_size += content_bytes
|
|
981
|
+
files_count += 1
|
|
982
|
+
file_list.append(f"- {rel_path} ({content_bytes} bytes)")
|
|
983
|
+
files_content.append(f"### File: {rel_path}\n```\n{content}\n```")
|
|
984
|
+
except Exception as e:
|
|
985
|
+
logger.debug("Error reading %s: %s", rel_path, e)
|
|
986
|
+
file_list.append(f"- {rel_path} (error reading file)")
|
|
987
|
+
|
|
988
|
+
# Break outer loop if file limit reached
|
|
989
|
+
if limits_reached:
|
|
990
|
+
break
|
|
991
|
+
|
|
992
|
+
return file_list, files_content, total_size
|
|
993
|
+
|
|
994
|
+
|
|
995
|
+
def _generate_review_markdown(
|
|
996
|
+
project_name: str,
|
|
997
|
+
provider: str,
|
|
998
|
+
model: str,
|
|
999
|
+
recommendation: str,
|
|
1000
|
+
issues_text: str,
|
|
1001
|
+
file_list: list[str],
|
|
1002
|
+
) -> str:
|
|
1003
|
+
"""Generate structured markdown for code review output."""
|
|
1004
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
1005
|
+
files_section = "\n".join(file_list)
|
|
1006
|
+
|
|
1007
|
+
return f"""# Code Review: {project_name}
|
|
1008
|
+
|
|
1009
|
+
**Provider:** {provider} ({model})
|
|
1010
|
+
**Date:** {timestamp}
|
|
1011
|
+
|
|
1012
|
+
## Review
|
|
1013
|
+
|
|
1014
|
+
{issues_text}
|
|
1015
|
+
|
|
1016
|
+
## Files Reviewed
|
|
1017
|
+
|
|
1018
|
+
{files_section}
|
|
1019
|
+
"""
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
@mcp.tool()
|
|
1023
|
+
async def code_review(
|
|
1024
|
+
directory: str,
|
|
1025
|
+
focus_areas: list[str] | None = None,
|
|
1026
|
+
provider: Provider = "deepseek",
|
|
1027
|
+
model: str = "",
|
|
1028
|
+
save_to: str = "",
|
|
1029
|
+
fast: bool = False,
|
|
1030
|
+
) -> str:
|
|
1031
|
+
"""
|
|
1032
|
+
Perform a systematic code review of a directory.
|
|
1033
|
+
|
|
1034
|
+
Args:
|
|
1035
|
+
directory: Path to the directory to review
|
|
1036
|
+
focus_areas: Optional list of areas to focus on (e.g., ["security", "performance"])
|
|
1037
|
+
provider: Model to use - "deepseek" (default), "moonshot", or "openrouter"
|
|
1038
|
+
model: Optional model override (e.g., "kimi-k2-0905-preview" for moonshot)
|
|
1039
|
+
save_to: Optional directory path to save review as {project_name}_{provider}_review.md
|
|
1040
|
+
fast: If True, ignored for code reviews (always uses maximum thinking)
|
|
1041
|
+
|
|
1042
|
+
Returns:
|
|
1043
|
+
Structured code review report with severity-classified issues
|
|
1044
|
+
"""
|
|
1045
|
+
if err := _validate_path_input(directory)[0]:
|
|
1046
|
+
return f"Error: {err}"
|
|
1047
|
+
if focus_areas:
|
|
1048
|
+
if len(focus_areas) > MAX_FOCUS_AREAS:
|
|
1049
|
+
return f"Error: Too many focus areas (max {MAX_FOCUS_AREAS})"
|
|
1050
|
+
provider, err = _validate_provider(provider)
|
|
1051
|
+
if err:
|
|
1052
|
+
return err
|
|
1053
|
+
|
|
1054
|
+
if model and (model_err := _validate_model(provider, model)):
|
|
1055
|
+
return model_err
|
|
1056
|
+
|
|
1057
|
+
# CRITICAL: Code reviews ALWAYS use maximum thinking - fast parameter ignored
|
|
1058
|
+
if fast:
|
|
1059
|
+
logger.warning("code_review ignores fast=True to ensure thorough analysis")
|
|
1060
|
+
|
|
1061
|
+
dir_path = Path(directory).resolve()
|
|
1062
|
+
if not dir_path.exists():
|
|
1063
|
+
return f"Error: Directory '{directory}' does not exist."
|
|
1064
|
+
if not dir_path.is_dir():
|
|
1065
|
+
return f"Error: '{directory}' is not a directory."
|
|
1066
|
+
|
|
1067
|
+
# Security: validate directory is in allowed roots
|
|
1068
|
+
if not _is_safe_directory(dir_path):
|
|
1069
|
+
logger.warning("Blocked code_review of unsafe directory: %s", dir_path)
|
|
1070
|
+
return (
|
|
1071
|
+
"Error: Directory not in allowed roots. "
|
|
1072
|
+
"Set SECOND_OPINION_ALLOWED_ROOTS environment variable to customize "
|
|
1073
|
+
f"(current roots: {[str(p) for p in ALLOWED_ROOTS]})."
|
|
1074
|
+
)
|
|
1075
|
+
|
|
1076
|
+
logger.info("Starting code review of %s", dir_path)
|
|
1077
|
+
|
|
1078
|
+
# Use larger max_size for moonshot (256K context window)
|
|
1079
|
+
config = MODELS[provider]
|
|
1080
|
+
max_size = 400_000 if config.get("context_window", 0) >= 262144 else 100_000
|
|
1081
|
+
|
|
1082
|
+
file_list, files_content, total_size = _discover_reviewable_files(dir_path, max_size=max_size)
|
|
1083
|
+
|
|
1084
|
+
if not files_content:
|
|
1085
|
+
return f"No reviewable files found in '{directory}'."
|
|
1086
|
+
|
|
1087
|
+
logger.info("Reviewing %d files, %d bytes total", len(files_content), total_size)
|
|
1088
|
+
|
|
1089
|
+
# Force best model for reviews
|
|
1090
|
+
review_model = model
|
|
1091
|
+
if not review_model:
|
|
1092
|
+
if provider == "deepseek":
|
|
1093
|
+
review_model = "deepseek-reasoner" # Always use reasoning model
|
|
1094
|
+
elif provider == "moonshot":
|
|
1095
|
+
# Use kimi-k2-0905-preview for code reviews:
|
|
1096
|
+
# - Optimized for agentic coding tasks
|
|
1097
|
+
# - Accepts temperature=0.0 for precision (unlike kimi-k2.5 which is fixed)
|
|
1098
|
+
# - No streaming requirement
|
|
1099
|
+
review_model = "kimi-k2-0905-preview"
|
|
1100
|
+
|
|
1101
|
+
# Build the review prompt
|
|
1102
|
+
focus_instruction = ""
|
|
1103
|
+
if focus_areas:
|
|
1104
|
+
focus_instruction = f"\n\nFocus especially on: {', '.join(focus_areas)}"
|
|
1105
|
+
|
|
1106
|
+
system = f"""You are a senior code reviewer performing a systematic review.
|
|
1107
|
+
Analyze the codebase and produce a structured report.{focus_instruction}
|
|
1108
|
+
|
|
1109
|
+
Classify issues by severity:
|
|
1110
|
+
- BLOCKER: Must fix before deployment (security vulnerabilities, data loss risks)
|
|
1111
|
+
- CRITICAL: Should fix soon (significant bugs, major performance issues)
|
|
1112
|
+
- MAJOR: Should fix (code smells, maintainability issues)
|
|
1113
|
+
- MINOR: Nice to fix (style issues, minor improvements)
|
|
1114
|
+
|
|
1115
|
+
For each issue, provide:
|
|
1116
|
+
- File and line number (if applicable)
|
|
1117
|
+
- Clear description of the problem
|
|
1118
|
+
- Suggested fix
|
|
1119
|
+
|
|
1120
|
+
End with a summary and recommendation: approve, request changes, or reject."""
|
|
1121
|
+
|
|
1122
|
+
file_listing = "\n".join(file_list)
|
|
1123
|
+
file_contents = "\n".join(files_content)
|
|
1124
|
+
prompt = f"""## Project Structure
|
|
1125
|
+
{file_listing}
|
|
1126
|
+
|
|
1127
|
+
## Files to Review
|
|
1128
|
+
{file_contents}
|
|
1129
|
+
|
|
1130
|
+
Please provide a comprehensive code review."""
|
|
1131
|
+
|
|
1132
|
+
# Temperature 0.0: maximum precision for code analysis
|
|
1133
|
+
# fast=False: always use thinking/reasoning mode for thorough review
|
|
1134
|
+
result = await call_model(
|
|
1135
|
+
provider, prompt, system, model=review_model,
|
|
1136
|
+
fast=False, temperature=0.0
|
|
1137
|
+
)
|
|
1138
|
+
|
|
1139
|
+
# Save to file if requested
|
|
1140
|
+
if save_to:
|
|
1141
|
+
try:
|
|
1142
|
+
save_path = Path(save_to)
|
|
1143
|
+
save_path.mkdir(parents=True, exist_ok=True)
|
|
1144
|
+
project_name = dir_path.name
|
|
1145
|
+
model_name = model or config["model"]
|
|
1146
|
+
filename = f"{project_name}_{provider}_review.md"
|
|
1147
|
+
filepath = save_path / filename
|
|
1148
|
+
|
|
1149
|
+
# Write structured markdown
|
|
1150
|
+
md_content = _generate_review_markdown(
|
|
1151
|
+
project_name, provider, model_name, "", result, file_list
|
|
1152
|
+
)
|
|
1153
|
+
filepath.write_text(md_content, encoding="utf-8")
|
|
1154
|
+
logger.info("Saved review to %s", filepath)
|
|
1155
|
+
result = f"Review saved to: {filepath}\n\n{result}"
|
|
1156
|
+
except Exception as e:
|
|
1157
|
+
logger.error("Failed to save review: %s", e)
|
|
1158
|
+
result = f"Warning: Could not save review ({e})\n\n{result}"
|
|
1159
|
+
|
|
1160
|
+
return result
|
|
1161
|
+
|
|
1162
|
+
|
|
1163
|
+
def _truncate_for_context(text: str, max_chars: int = 8000) -> str:
|
|
1164
|
+
"""Truncate text to fit in context, keeping beginning and end."""
|
|
1165
|
+
if len(text) <= max_chars:
|
|
1166
|
+
return text
|
|
1167
|
+
half = max_chars // 2 - 50
|
|
1168
|
+
return f"{text[:half]}\n\n[... truncated {len(text) - max_chars} chars ...]\n\n{text[-half:]}"
|
|
1169
|
+
|
|
1170
|
+
|
|
1171
|
+
@mcp.tool()
|
|
1172
|
+
async def consensus(
|
|
1173
|
+
topic: str,
|
|
1174
|
+
stances: list[str] | None = None,
|
|
1175
|
+
rounds: int = 2,
|
|
1176
|
+
include_reasoning: bool = True,
|
|
1177
|
+
) -> str:
|
|
1178
|
+
"""
|
|
1179
|
+
Multi-model debate on a topic. DeepSeek and Moonshot argue different stances,
|
|
1180
|
+
then a synthesis is produced.
|
|
1181
|
+
|
|
1182
|
+
Args:
|
|
1183
|
+
topic: The topic to debate
|
|
1184
|
+
stances: Optional list of 2 stances [stance_a, stance_b]. If not provided,
|
|
1185
|
+
defaults to ["argue for", "argue against"]
|
|
1186
|
+
rounds: Number of debate rounds (1-3, default 2)
|
|
1187
|
+
include_reasoning: If True, include reasoning from thinking models in transcript
|
|
1188
|
+
|
|
1189
|
+
Returns:
|
|
1190
|
+
Structured debate transcript with synthesis
|
|
1191
|
+
"""
|
|
1192
|
+
if err := _validate_prompt_input(topic)[0]:
|
|
1193
|
+
return f"Error: {err}"
|
|
1194
|
+
# Validate stances parameter
|
|
1195
|
+
if stances is not None and len(stances) < 2:
|
|
1196
|
+
return "Error: stances must contain at least 2 items."
|
|
1197
|
+
|
|
1198
|
+
rounds = max(1, min(3, rounds)) # Clamp to 1-3
|
|
1199
|
+
|
|
1200
|
+
if stances and len(stances) >= 2:
|
|
1201
|
+
stance_a, stance_b = stances[0], stances[1]
|
|
1202
|
+
else:
|
|
1203
|
+
stance_a, stance_b = "argue for", "argue against"
|
|
1204
|
+
|
|
1205
|
+
# Determine which providers to use
|
|
1206
|
+
provider_a = "deepseek" if MODELS["deepseek"].get("enabled") else "moonshot"
|
|
1207
|
+
provider_b = "moonshot" if MODELS["moonshot"].get("enabled") else "deepseek"
|
|
1208
|
+
|
|
1209
|
+
# Validate at least one provider is available
|
|
1210
|
+
_, err_a = _validate_provider(provider_a)
|
|
1211
|
+
_, err_b = _validate_provider(provider_b)
|
|
1212
|
+
if err_a and err_b:
|
|
1213
|
+
return f"Error: No providers available for consensus debate. Enable deepseek or moonshot."
|
|
1214
|
+
|
|
1215
|
+
output_parts = [f"## Debate: {topic}\n"]
|
|
1216
|
+
|
|
1217
|
+
# Round 1 - Initial arguments (parallel)
|
|
1218
|
+
output_parts.append("### Round 1 - Initial Arguments\n")
|
|
1219
|
+
|
|
1220
|
+
prompt_a = f"Topic: {topic}\n\nYour stance: {stance_a}\n\nProvide a clear, well-reasoned argument for your stance. Be specific and cite concrete reasons."
|
|
1221
|
+
prompt_b = f"Topic: {topic}\n\nYour stance: {stance_b}\n\nProvide a clear, well-reasoned argument for your stance. Be specific and cite concrete reasons."
|
|
1222
|
+
|
|
1223
|
+
# Temperature 1.3: debate benefits from diverse, creative responses
|
|
1224
|
+
try:
|
|
1225
|
+
results = await asyncio.gather(
|
|
1226
|
+
call_model(provider_a, prompt_a, include_reasoning=include_reasoning, temperature=1.3),
|
|
1227
|
+
call_model(provider_b, prompt_b, include_reasoning=include_reasoning, temperature=1.3),
|
|
1228
|
+
return_exceptions=True,
|
|
1229
|
+
)
|
|
1230
|
+
except Exception as e:
|
|
1231
|
+
logger.error("Consensus parallel calls failed: %s", e)
|
|
1232
|
+
return f"Error: Parallel API calls failed. {e}"
|
|
1233
|
+
|
|
1234
|
+
# Handle individual failures gracefully
|
|
1235
|
+
arg_a = results[0] if not isinstance(results[0], Exception) else f"[{provider_a} unavailable: {results[0]}]"
|
|
1236
|
+
arg_b = results[1] if not isinstance(results[1], Exception) else f"[{provider_b} unavailable: {results[1]}]"
|
|
1237
|
+
|
|
1238
|
+
# Check for API errors in responses (string errors from call_model)
|
|
1239
|
+
both_failed = (
|
|
1240
|
+
(isinstance(results[0], Exception) or arg_a.startswith("Error:")) and
|
|
1241
|
+
(isinstance(results[1], Exception) or arg_b.startswith("Error:"))
|
|
1242
|
+
)
|
|
1243
|
+
if both_failed:
|
|
1244
|
+
return f"## Debate: {topic}\n\n### Round 1 Failed\n\n{provider_a.title()}: {arg_a}\n\n{provider_b.title()}: {arg_b}"
|
|
1245
|
+
|
|
1246
|
+
output_parts.append(f"**{provider_a.title()}** (stance: {stance_a}):\n{arg_a}\n")
|
|
1247
|
+
output_parts.append(f"**{provider_b.title()}** (stance: {stance_b}):\n{arg_b}\n")
|
|
1248
|
+
|
|
1249
|
+
# Round 2+ - Rebuttals (sequential to reference previous arguments)
|
|
1250
|
+
prev_a, prev_b = arg_a, arg_b
|
|
1251
|
+
|
|
1252
|
+
for round_num in range(2, rounds + 1):
|
|
1253
|
+
output_parts.append(f"### Round {round_num} - Rebuttals\n")
|
|
1254
|
+
|
|
1255
|
+
rebuttal_prompt_a = f"""Topic: {topic}
|
|
1256
|
+
Your stance: {stance_a}
|
|
1257
|
+
Your previous argument: {_truncate_for_context(prev_a)}
|
|
1258
|
+
|
|
1259
|
+
Your opponent ({provider_b.title()}) argued:
|
|
1260
|
+
{_truncate_for_context(prev_b)}
|
|
1261
|
+
|
|
1262
|
+
Rebut their argument while strengthening your position. Address their specific points."""
|
|
1263
|
+
|
|
1264
|
+
try:
|
|
1265
|
+
rebuttal_a = await call_model(provider_a, rebuttal_prompt_a, include_reasoning=include_reasoning, temperature=1.3)
|
|
1266
|
+
# If API returned error, use previous argument as fallback
|
|
1267
|
+
if rebuttal_a.startswith("Error:"):
|
|
1268
|
+
logger.warning("%s returned error in round %d, using fallback", provider_a, round_num)
|
|
1269
|
+
rebuttal_a = prev_a
|
|
1270
|
+
except Exception as e:
|
|
1271
|
+
logger.error("Consensus rebuttal A failed in round %d: %s", round_num, e)
|
|
1272
|
+
rebuttal_a = f"[{provider_a.title()} unavailable in round {round_num}]"
|
|
1273
|
+
|
|
1274
|
+
rebuttal_prompt_b = f"""Topic: {topic}
|
|
1275
|
+
Your stance: {stance_b}
|
|
1276
|
+
Your previous argument: {_truncate_for_context(prev_b)}
|
|
1277
|
+
|
|
1278
|
+
Your opponent ({provider_a.title()}) argued:
|
|
1279
|
+
{_truncate_for_context(prev_a)}
|
|
1280
|
+
|
|
1281
|
+
Then they rebutted with:
|
|
1282
|
+
{_truncate_for_context(rebuttal_a)}
|
|
1283
|
+
|
|
1284
|
+
Rebut their arguments while strengthening your position. Address their specific points."""
|
|
1285
|
+
|
|
1286
|
+
try:
|
|
1287
|
+
rebuttal_b = await call_model(provider_b, rebuttal_prompt_b, include_reasoning=include_reasoning, temperature=1.3)
|
|
1288
|
+
# If API returned error, use previous argument as fallback
|
|
1289
|
+
if rebuttal_b.startswith("Error:"):
|
|
1290
|
+
logger.warning("%s returned error in round %d, using fallback", provider_b, round_num)
|
|
1291
|
+
rebuttal_b = prev_b
|
|
1292
|
+
except Exception as e:
|
|
1293
|
+
logger.error("Consensus rebuttal B failed in round %d: %s", round_num, e)
|
|
1294
|
+
rebuttal_b = f"[{provider_b.title()} unavailable in round {round_num}]"
|
|
1295
|
+
|
|
1296
|
+
output_parts.append(f"**{provider_a.title()}** rebuttal:\n{rebuttal_a}\n")
|
|
1297
|
+
output_parts.append(f"**{provider_b.title()}** rebuttal:\n{rebuttal_b}\n")
|
|
1298
|
+
|
|
1299
|
+
prev_a, prev_b = rebuttal_a, rebuttal_b
|
|
1300
|
+
|
|
1301
|
+
# Synthesis - use DeepSeek (or first available provider)
|
|
1302
|
+
output_parts.append("### Consensus Synthesis\n")
|
|
1303
|
+
|
|
1304
|
+
synthesis_prompt = f"""You are a neutral synthesizer. Given this debate, produce a balanced conclusion.
|
|
1305
|
+
|
|
1306
|
+
Topic: {topic}
|
|
1307
|
+
|
|
1308
|
+
{provider_a.title()} argued ({stance_a}):
|
|
1309
|
+
{prev_a}
|
|
1310
|
+
|
|
1311
|
+
{provider_b.title()} argued ({stance_b}):
|
|
1312
|
+
{prev_b}
|
|
1313
|
+
|
|
1314
|
+
Synthesize the strongest points from both sides into a nuanced conclusion. Acknowledge trade-offs and context-dependent factors. Do not declare a "winner" - instead, identify when each perspective is most applicable."""
|
|
1315
|
+
|
|
1316
|
+
# Synthesis uses balanced temperature (1.0)
|
|
1317
|
+
try:
|
|
1318
|
+
synthesis = await call_model(provider_a, synthesis_prompt, temperature=1.0)
|
|
1319
|
+
except Exception as e:
|
|
1320
|
+
logger.error("Consensus synthesis failed: %s", e)
|
|
1321
|
+
synthesis = "[Synthesis unavailable - API call failed]"
|
|
1322
|
+
|
|
1323
|
+
output_parts.append(synthesis)
|
|
1324
|
+
|
|
1325
|
+
return "\n".join(output_parts)
|
|
1326
|
+
|
|
1327
|
+
|
|
1328
|
+
@mcp.tool()
|
|
1329
|
+
async def review_synthesis(
|
|
1330
|
+
directory: str,
|
|
1331
|
+
focus_areas: list[str] | None = None,
|
|
1332
|
+
save_to: str = "",
|
|
1333
|
+
debate_rounds: int = 2,
|
|
1334
|
+
) -> str:
|
|
1335
|
+
"""
|
|
1336
|
+
Full code review synthesis workflow:
|
|
1337
|
+
1. Run parallel code reviews from DeepSeek and Moonshot
|
|
1338
|
+
2. Conduct consensus debate on findings
|
|
1339
|
+
3. Return synthesis instructions for Claude Code (Opus 4.5 arbiter)
|
|
1340
|
+
|
|
1341
|
+
Args:
|
|
1342
|
+
directory: Path to the directory to review
|
|
1343
|
+
focus_areas: Optional list of areas to focus on (e.g., ["security", "performance"])
|
|
1344
|
+
save_to: Directory path to save all output files (required for full workflow)
|
|
1345
|
+
debate_rounds: Number of debate rounds (1-3, default 2)
|
|
1346
|
+
|
|
1347
|
+
Creates files in save_to directory:
|
|
1348
|
+
- {project}_deepseek_review.md
|
|
1349
|
+
- {project}_moonshot_review.md
|
|
1350
|
+
- {project}_debate_transcript.md
|
|
1351
|
+
|
|
1352
|
+
Returns:
|
|
1353
|
+
Synthesis instructions for Claude Code to compile final review
|
|
1354
|
+
"""
|
|
1355
|
+
if err := _validate_path_input(directory)[0]:
|
|
1356
|
+
return f"Error: {err}"
|
|
1357
|
+
dir_path = Path(directory).resolve()
|
|
1358
|
+
if not dir_path.exists():
|
|
1359
|
+
return f"Error: Directory '{directory}' does not exist."
|
|
1360
|
+
if not dir_path.is_dir():
|
|
1361
|
+
return f"Error: '{directory}' is not a directory."
|
|
1362
|
+
|
|
1363
|
+
# Security check
|
|
1364
|
+
if not _is_safe_directory(dir_path):
|
|
1365
|
+
return (
|
|
1366
|
+
"Error: Directory not in allowed roots. "
|
|
1367
|
+
f"Set SECOND_OPINION_ALLOWED_ROOTS (current: {[str(p) for p in ALLOWED_ROOTS]})."
|
|
1368
|
+
)
|
|
1369
|
+
|
|
1370
|
+
project_name = dir_path.name
|
|
1371
|
+
|
|
1372
|
+
# Check provider availability
|
|
1373
|
+
deepseek_enabled = MODELS["deepseek"].get("enabled")
|
|
1374
|
+
moonshot_enabled = MODELS["moonshot"].get("enabled")
|
|
1375
|
+
|
|
1376
|
+
if not deepseek_enabled and not moonshot_enabled:
|
|
1377
|
+
return "Error: At least one of deepseek or moonshot must be enabled for review_synthesis."
|
|
1378
|
+
|
|
1379
|
+
# Step 1: Parallel code reviews
|
|
1380
|
+
logger.info("Starting parallel code reviews for %s", project_name)
|
|
1381
|
+
|
|
1382
|
+
reviews = {}
|
|
1383
|
+
tasks = []
|
|
1384
|
+
providers_used = []
|
|
1385
|
+
|
|
1386
|
+
if deepseek_enabled:
|
|
1387
|
+
# deepseek-reasoner: maximum thinking for thorough review
|
|
1388
|
+
tasks.append(code_review(
|
|
1389
|
+
directory, focus_areas, provider="deepseek",
|
|
1390
|
+
model="deepseek-reasoner", save_to=save_to
|
|
1391
|
+
))
|
|
1392
|
+
providers_used.append("deepseek")
|
|
1393
|
+
|
|
1394
|
+
if moonshot_enabled:
|
|
1395
|
+
# kimi-k2.5: deep thinking enabled by default for thorough review
|
|
1396
|
+
tasks.append(code_review(
|
|
1397
|
+
directory, focus_areas, provider="moonshot",
|
|
1398
|
+
model="kimi-k2.5", save_to=save_to
|
|
1399
|
+
))
|
|
1400
|
+
providers_used.append("moonshot")
|
|
1401
|
+
|
|
1402
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
1403
|
+
|
|
1404
|
+
for provider, result in zip(providers_used, results):
|
|
1405
|
+
if isinstance(result, Exception):
|
|
1406
|
+
reviews[provider] = f"Error: {result}"
|
|
1407
|
+
logger.error("Review from %s failed: %s", provider, result)
|
|
1408
|
+
else:
|
|
1409
|
+
reviews[provider] = result
|
|
1410
|
+
|
|
1411
|
+
# Check for critical failures
|
|
1412
|
+
errors = [p for p, r in reviews.items() if r.startswith("Error:")]
|
|
1413
|
+
if len(errors) == len(providers_used):
|
|
1414
|
+
return f"Error: All code reviews failed.\n\n" + "\n\n".join(
|
|
1415
|
+
f"**{p}:** {reviews[p]}" for p in providers_used
|
|
1416
|
+
)
|
|
1417
|
+
|
|
1418
|
+
# Step 2: Consensus debate on findings (if both providers succeeded)
|
|
1419
|
+
debate_transcript = ""
|
|
1420
|
+
if len(providers_used) == 2 and not errors:
|
|
1421
|
+
logger.info("Starting consensus debate on review findings")
|
|
1422
|
+
|
|
1423
|
+
# Extract key findings for debate
|
|
1424
|
+
deepseek_summary = _truncate_for_context(reviews.get("deepseek", ""), 4000)
|
|
1425
|
+
moonshot_summary = _truncate_for_context(reviews.get("moonshot", ""), 4000)
|
|
1426
|
+
|
|
1427
|
+
debate_transcript = await consensus(
|
|
1428
|
+
topic=f"Code review findings for {project_name}",
|
|
1429
|
+
stances=[
|
|
1430
|
+
f"DeepSeek's review findings are more accurate: {deepseek_summary[:500]}...",
|
|
1431
|
+
f"Moonshot's review findings are more accurate: {moonshot_summary[:500]}...",
|
|
1432
|
+
],
|
|
1433
|
+
rounds=debate_rounds,
|
|
1434
|
+
include_reasoning=True,
|
|
1435
|
+
)
|
|
1436
|
+
|
|
1437
|
+
# Save debate transcript
|
|
1438
|
+
if save_to:
|
|
1439
|
+
try:
|
|
1440
|
+
save_path = Path(save_to)
|
|
1441
|
+
debate_file = save_path / f"{project_name}_debate_transcript.md"
|
|
1442
|
+
debate_file.write_text(debate_transcript, encoding="utf-8")
|
|
1443
|
+
logger.info("Saved debate transcript to %s", debate_file)
|
|
1444
|
+
except Exception as e:
|
|
1445
|
+
logger.error("Failed to save debate transcript: %s", e)
|
|
1446
|
+
|
|
1447
|
+
# Step 3: Return synthesis instructions for Claude Code
|
|
1448
|
+
files_created = []
|
|
1449
|
+
if save_to:
|
|
1450
|
+
save_path = Path(save_to)
|
|
1451
|
+
if deepseek_enabled:
|
|
1452
|
+
files_created.append(f"`{save_path / f'{project_name}_deepseek_review.md'}`")
|
|
1453
|
+
if moonshot_enabled:
|
|
1454
|
+
files_created.append(f"`{save_path / f'{project_name}_moonshot_review.md'}`")
|
|
1455
|
+
if debate_transcript:
|
|
1456
|
+
files_created.append(f"`{save_path / f'{project_name}_debate_transcript.md'}`")
|
|
1457
|
+
|
|
1458
|
+
# Build summary sections
|
|
1459
|
+
deepseek_section = ""
|
|
1460
|
+
if "deepseek" in reviews:
|
|
1461
|
+
# Extract first 1000 chars as summary
|
|
1462
|
+
deepseek_section = f"""### DeepSeek Review Summary
|
|
1463
|
+
|
|
1464
|
+
{_truncate_for_context(reviews['deepseek'], 2000)}
|
|
1465
|
+
"""
|
|
1466
|
+
|
|
1467
|
+
moonshot_section = ""
|
|
1468
|
+
if "moonshot" in reviews:
|
|
1469
|
+
moonshot_section = f"""### Moonshot Review Summary
|
|
1470
|
+
|
|
1471
|
+
{_truncate_for_context(reviews['moonshot'], 2000)}
|
|
1472
|
+
"""
|
|
1473
|
+
|
|
1474
|
+
debate_section = ""
|
|
1475
|
+
if debate_transcript:
|
|
1476
|
+
debate_section = f"""### Debate Key Points
|
|
1477
|
+
|
|
1478
|
+
{_truncate_for_context(debate_transcript, 2000)}
|
|
1479
|
+
"""
|
|
1480
|
+
|
|
1481
|
+
synthesis_output = f"""## Code Review Synthesis Request
|
|
1482
|
+
|
|
1483
|
+
Parallel code reviews and consensus debate completed for **{project_name}**.
|
|
1484
|
+
|
|
1485
|
+
### Files Created
|
|
1486
|
+
|
|
1487
|
+
{chr(10).join(f'- {f}' for f in files_created) if files_created else '(No files saved - set save_to parameter to save reviews)'}
|
|
1488
|
+
|
|
1489
|
+
{deepseek_section}
|
|
1490
|
+
{moonshot_section}
|
|
1491
|
+
{debate_section}
|
|
1492
|
+
|
|
1493
|
+
### Synthesis Instructions
|
|
1494
|
+
|
|
1495
|
+
Please synthesize these reviews into a final authoritative code review document:
|
|
1496
|
+
|
|
1497
|
+
1. Identify issues confirmed by both reviewers (highest confidence)
|
|
1498
|
+
2. Resolve disagreements using the debate findings
|
|
1499
|
+
3. Remove likely false positives identified in debate
|
|
1500
|
+
4. Prioritize by actual severity and impact
|
|
1501
|
+
5. Provide actionable fix recommendations
|
|
1502
|
+
|
|
1503
|
+
{f'Save the final synthesis to: `{Path(save_to) / f"{project_name}_final_review.md"}`' if save_to else ''}
|
|
1504
|
+
"""
|
|
1505
|
+
|
|
1506
|
+
return synthesis_output
|
|
1507
|
+
|
|
1508
|
+
|
|
1509
|
+
def _sync_cleanup():
|
|
1510
|
+
"""Synchronous wrapper for cleanup on exit."""
|
|
1511
|
+
try:
|
|
1512
|
+
loop = asyncio.get_event_loop()
|
|
1513
|
+
if loop.is_running() or loop.is_closed():
|
|
1514
|
+
logger.debug("Event loop unavailable at shutdown")
|
|
1515
|
+
return
|
|
1516
|
+
loop.run_until_complete(cleanup_clients())
|
|
1517
|
+
except RuntimeError as e:
|
|
1518
|
+
logger.debug("Cleanup skipped (no event loop): %s", e)
|
|
1519
|
+
except Exception as e:
|
|
1520
|
+
logger.warning("Cleanup failed: %s: %s", type(e).__name__, e)
|
|
1521
|
+
|
|
1522
|
+
|
|
1523
|
+
atexit.register(_sync_cleanup)
|
|
1524
|
+
|
|
1525
|
+
|
|
1526
|
+
def main():
|
|
1527
|
+
"""Run the MCP server with stdio transport."""
|
|
1528
|
+
# Configure logging when run directly (not when imported as library)
|
|
1529
|
+
logging.basicConfig(
|
|
1530
|
+
level=logging.INFO,
|
|
1531
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
1532
|
+
)
|
|
1533
|
+
mcp.run(transport="stdio")
|
|
1534
|
+
|
|
1535
|
+
|
|
1536
|
+
if __name__ == "__main__":
|
|
1537
|
+
main()
|