devguard 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devguard/INTEGRATION_SUMMARY.md +121 -0
- devguard/__init__.py +3 -0
- devguard/__main__.py +6 -0
- devguard/checkers/__init__.py +41 -0
- devguard/checkers/api_usage.py +523 -0
- devguard/checkers/aws_cost.py +331 -0
- devguard/checkers/aws_iam.py +284 -0
- devguard/checkers/base.py +25 -0
- devguard/checkers/container.py +137 -0
- devguard/checkers/domain.py +189 -0
- devguard/checkers/firecrawl.py +117 -0
- devguard/checkers/fly.py +225 -0
- devguard/checkers/github.py +210 -0
- devguard/checkers/npm.py +327 -0
- devguard/checkers/npm_security.py +244 -0
- devguard/checkers/redteam.py +290 -0
- devguard/checkers/secret.py +279 -0
- devguard/checkers/swarm.py +376 -0
- devguard/checkers/tailscale.py +143 -0
- devguard/checkers/tailsnitch.py +303 -0
- devguard/checkers/tavily.py +179 -0
- devguard/checkers/vercel.py +192 -0
- devguard/cli.py +1510 -0
- devguard/cli_helpers.py +189 -0
- devguard/config.py +249 -0
- devguard/core.py +293 -0
- devguard/dashboard.py +715 -0
- devguard/discovery.py +363 -0
- devguard/http_client.py +142 -0
- devguard/llm_service.py +481 -0
- devguard/mcp_server.py +259 -0
- devguard/metrics.py +144 -0
- devguard/models.py +208 -0
- devguard/reporting.py +1571 -0
- devguard/sarif.py +295 -0
- devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
- devguard/scripts/README.md +221 -0
- devguard/scripts/auto_fix_recommendations.py +145 -0
- devguard/scripts/generate_npmignore.py +175 -0
- devguard/scripts/generate_security_report.py +324 -0
- devguard/scripts/prepublish_check.sh +29 -0
- devguard/scripts/redteam_npm_packages.py +1262 -0
- devguard/scripts/review_all_repos.py +300 -0
- devguard/spec.py +617 -0
- devguard/sweeps/__init__.py +23 -0
- devguard/sweeps/ai_editor_config_audit.py +697 -0
- devguard/sweeps/cargo_publish_audit.py +655 -0
- devguard/sweeps/dependency_audit.py +419 -0
- devguard/sweeps/gitignore_audit.py +336 -0
- devguard/sweeps/local_dev.py +260 -0
- devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
- devguard/sweeps/project_flaudit.py +636 -0
- devguard/sweeps/public_github_secrets.py +680 -0
- devguard/sweeps/publish_audit.py +478 -0
- devguard/sweeps/ssh_key_audit.py +327 -0
- devguard/utils.py +174 -0
- devguard-0.2.0.dist-info/METADATA +225 -0
- devguard-0.2.0.dist-info/RECORD +60 -0
- devguard-0.2.0.dist-info/WHEEL +4 -0
- devguard-0.2.0.dist-info/entry_points.txt +2 -0
devguard/llm_service.py
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
"""LLM service for Guardian judgements and content generation."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from devguard.config import Settings
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LLMService:
|
|
13
|
+
"""Service for LLM-powered judgements and content generation."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, settings: Settings):
|
|
16
|
+
"""Initialize LLM service with settings."""
|
|
17
|
+
self.settings = settings
|
|
18
|
+
self._client = None
|
|
19
|
+
|
|
20
|
+
def _get_client(self):
|
|
21
|
+
"""Get LLM client (Anthropic, OpenAI, or OpenRouter)."""
|
|
22
|
+
if self._client is not None:
|
|
23
|
+
return self._client
|
|
24
|
+
|
|
25
|
+
# Prefer Anthropic if available
|
|
26
|
+
if self.settings.anthropic_api_key:
|
|
27
|
+
try:
|
|
28
|
+
import anthropic
|
|
29
|
+
|
|
30
|
+
self._client = (
|
|
31
|
+
"anthropic",
|
|
32
|
+
anthropic.Anthropic(api_key=str(self.settings.anthropic_api_key)),
|
|
33
|
+
)
|
|
34
|
+
return self._client
|
|
35
|
+
except ImportError:
|
|
36
|
+
logger.debug("anthropic package not installed")
|
|
37
|
+
except Exception as e:
|
|
38
|
+
logger.debug(f"Failed to initialize Anthropic client: {e}")
|
|
39
|
+
|
|
40
|
+
# Fallback to OpenAI
|
|
41
|
+
if self.settings.openai_api_key:
|
|
42
|
+
try:
|
|
43
|
+
import openai
|
|
44
|
+
|
|
45
|
+
self._client = ("openai", openai.OpenAI(api_key=str(self.settings.openai_api_key)))
|
|
46
|
+
return self._client
|
|
47
|
+
except ImportError:
|
|
48
|
+
logger.debug("openai package not installed")
|
|
49
|
+
except Exception as e:
|
|
50
|
+
logger.debug(f"Failed to initialize OpenAI client: {e}")
|
|
51
|
+
|
|
52
|
+
# Fallback to OpenRouter
|
|
53
|
+
if self.settings.openrouter_api_key:
|
|
54
|
+
try:
|
|
55
|
+
import openai
|
|
56
|
+
|
|
57
|
+
self._client = (
|
|
58
|
+
"openrouter",
|
|
59
|
+
openai.OpenAI(
|
|
60
|
+
api_key=str(self.settings.openrouter_api_key),
|
|
61
|
+
base_url="https://openrouter.ai/api/v1",
|
|
62
|
+
),
|
|
63
|
+
)
|
|
64
|
+
return self._client
|
|
65
|
+
except ImportError:
|
|
66
|
+
logger.debug("openai package not installed for OpenRouter")
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.debug(f"Failed to initialize OpenRouter client: {e}")
|
|
69
|
+
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
async def should_send_email(
|
|
73
|
+
self, report: dict[str, Any], email_history: list[dict[str, Any]]
|
|
74
|
+
) -> dict[str, Any]:
|
|
75
|
+
"""Use LLM to determine if email should be sent based on report and history.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
{
|
|
79
|
+
"should_send": bool,
|
|
80
|
+
"reasoning": str,
|
|
81
|
+
"priority": "critical" | "high" | "medium" | "low",
|
|
82
|
+
"summary": str
|
|
83
|
+
}
|
|
84
|
+
"""
|
|
85
|
+
client_info = self._get_client()
|
|
86
|
+
if not client_info:
|
|
87
|
+
# Fallback to rule-based decision
|
|
88
|
+
return {
|
|
89
|
+
"should_send": self._rule_based_should_send(report),
|
|
90
|
+
"reasoning": "LLM not available, using rule-based decision",
|
|
91
|
+
"priority": "high"
|
|
92
|
+
if report.get("summary", {}).get("critical_vulnerabilities", 0) > 0
|
|
93
|
+
else "medium",
|
|
94
|
+
"summary": "Rule-based analysis",
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
provider, client = client_info
|
|
98
|
+
|
|
99
|
+
# Build context from recent history
|
|
100
|
+
recent_history = email_history[-5:] if email_history else []
|
|
101
|
+
history_context = ""
|
|
102
|
+
if recent_history:
|
|
103
|
+
history_context = "\nRecent email history:\n"
|
|
104
|
+
for entry in recent_history:
|
|
105
|
+
history_context += (
|
|
106
|
+
f"- {entry.get('timestamp', 'unknown')}: {entry.get('subject', 'N/A')}\n"
|
|
107
|
+
)
|
|
108
|
+
history_context += f" Issues: {entry.get('summary', {})}\n"
|
|
109
|
+
|
|
110
|
+
prompt = f"""You are a security operations analyst deciding whether to send an alert email.
|
|
111
|
+
|
|
112
|
+
Current report summary:
|
|
113
|
+
- Critical vulnerabilities: {report.get("summary", {}).get("critical_vulnerabilities", 0)}
|
|
114
|
+
- High priority findings: {report.get("summary", {}).get("high_findings", 0)}
|
|
115
|
+
- Critical findings: {report.get("summary", {}).get("critical_findings", 0)}
|
|
116
|
+
- Unhealthy deployments: {report.get("summary", {}).get("unhealthy_deployments", 0)}
|
|
117
|
+
- Failed checks: {report.get("summary", {}).get("failed_checks", 0)}
|
|
118
|
+
- Total vulnerabilities: {report.get("summary", {}).get("total_vulnerabilities", 0)}
|
|
119
|
+
|
|
120
|
+
{history_context}
|
|
121
|
+
|
|
122
|
+
Top issues:
|
|
123
|
+
{json.dumps(report.get("issues", {}), indent=2)[:1000]}
|
|
124
|
+
|
|
125
|
+
Analyze whether an email alert should be sent. Consider:
|
|
126
|
+
1. Severity and urgency of issues
|
|
127
|
+
2. Whether similar issues were recently reported (avoid alert fatigue)
|
|
128
|
+
3. Whether issues are new or ongoing
|
|
129
|
+
4. Business impact
|
|
130
|
+
|
|
131
|
+
Respond with JSON:
|
|
132
|
+
{{
|
|
133
|
+
"should_send": true/false,
|
|
134
|
+
"reasoning": "brief explanation",
|
|
135
|
+
"priority": "critical" | "high" | "medium" | "low",
|
|
136
|
+
"summary": "one sentence executive summary"
|
|
137
|
+
}}"""
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
if provider == "anthropic":
|
|
141
|
+
response = client.messages.create(
|
|
142
|
+
model="claude-3-5-sonnet-20241022",
|
|
143
|
+
max_tokens=500,
|
|
144
|
+
messages=[{"role": "user", "content": prompt}],
|
|
145
|
+
)
|
|
146
|
+
content = response.content[0].text
|
|
147
|
+
elif provider in ("openai", "openrouter"):
|
|
148
|
+
model = "gpt-4o-mini" if provider == "openai" else "anthropic/claude-3.5-sonnet"
|
|
149
|
+
response = client.chat.completions.create(
|
|
150
|
+
model=model,
|
|
151
|
+
messages=[{"role": "user", "content": prompt}],
|
|
152
|
+
response_format={"type": "json_object"} if provider == "openai" else None,
|
|
153
|
+
)
|
|
154
|
+
content = response.choices[0].message.content
|
|
155
|
+
else:
|
|
156
|
+
raise ValueError(f"Unknown provider: {provider}")
|
|
157
|
+
|
|
158
|
+
# Parse JSON response
|
|
159
|
+
result = json.loads(content)
|
|
160
|
+
return {
|
|
161
|
+
"should_send": result.get("should_send", True),
|
|
162
|
+
"reasoning": result.get("reasoning", "LLM analysis"),
|
|
163
|
+
"priority": result.get("priority", "medium"),
|
|
164
|
+
"summary": result.get("summary", ""),
|
|
165
|
+
}
|
|
166
|
+
except Exception as e:
|
|
167
|
+
logger.warning(f"LLM decision failed: {e}, falling back to rule-based")
|
|
168
|
+
return {
|
|
169
|
+
"should_send": self._rule_based_should_send(report),
|
|
170
|
+
"reasoning": f"LLM error: {str(e)}",
|
|
171
|
+
"priority": "high"
|
|
172
|
+
if report.get("summary", {}).get("critical_vulnerabilities", 0) > 0
|
|
173
|
+
else "medium",
|
|
174
|
+
"summary": "Rule-based fallback",
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
def _rule_based_should_send(self, report: dict[str, Any]) -> bool:
|
|
178
|
+
"""Fallback rule-based decision."""
|
|
179
|
+
summary = report.get("summary", {})
|
|
180
|
+
return (
|
|
181
|
+
summary.get("critical_vulnerabilities", 0) > 0
|
|
182
|
+
or summary.get("critical_findings", 0) > 0
|
|
183
|
+
or summary.get("high_findings", 0) > 0
|
|
184
|
+
or summary.get("unhealthy_deployments", 0) > 0
|
|
185
|
+
or summary.get("failed_checks", 0) > 0
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
async def generate_subject_line(self, report: dict[str, Any], priority: str = "medium") -> str:
|
|
189
|
+
"""Generate contextual subject line using LLM."""
|
|
190
|
+
client_info = self._get_client()
|
|
191
|
+
if not client_info:
|
|
192
|
+
return self._generate_subject_fallback(report)
|
|
193
|
+
|
|
194
|
+
provider, client = client_info
|
|
195
|
+
|
|
196
|
+
prompt = f"""Generate a concise, actionable email subject line for a security monitoring alert.
|
|
197
|
+
|
|
198
|
+
Report summary:
|
|
199
|
+
- Priority: {priority}
|
|
200
|
+
- Critical vulnerabilities: {report.get("summary", {}).get("critical_vulnerabilities", 0)}
|
|
201
|
+
- High findings: {report.get("summary", {}).get("high_findings", 0)}
|
|
202
|
+
- Unhealthy deployments: {report.get("summary", {}).get("unhealthy_deployments", 0)}
|
|
203
|
+
|
|
204
|
+
Top issues:
|
|
205
|
+
{json.dumps(report.get("issues", {}), indent=2)[:500]}
|
|
206
|
+
|
|
207
|
+
Generate a subject line that:
|
|
208
|
+
1. Starts with "Guardian Security Report -"
|
|
209
|
+
2. Indicates urgency level
|
|
210
|
+
3. Highlights the most critical issue(s)
|
|
211
|
+
4. Is under 100 characters
|
|
212
|
+
5. Is actionable and specific
|
|
213
|
+
|
|
214
|
+
Respond with ONLY the subject line, no quotes or explanation."""
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
if provider == "anthropic":
|
|
218
|
+
response = client.messages.create(
|
|
219
|
+
model="claude-3-5-sonnet-20241022",
|
|
220
|
+
max_tokens=100,
|
|
221
|
+
messages=[{"role": "user", "content": prompt}],
|
|
222
|
+
)
|
|
223
|
+
subject = response.content[0].text.strip().strip('"').strip("'")
|
|
224
|
+
elif provider in ("openai", "openrouter"):
|
|
225
|
+
model = "gpt-4o-mini" if provider == "openai" else "anthropic/claude-3.5-sonnet"
|
|
226
|
+
response = client.chat.completions.create(
|
|
227
|
+
model=model, messages=[{"role": "user", "content": prompt}], max_tokens=100
|
|
228
|
+
)
|
|
229
|
+
subject = response.choices[0].message.content.strip().strip('"').strip("'")
|
|
230
|
+
else:
|
|
231
|
+
raise ValueError(f"Unknown provider: {provider}")
|
|
232
|
+
|
|
233
|
+
# Ensure it starts with prefix
|
|
234
|
+
if not subject.startswith("Guardian Security Report"):
|
|
235
|
+
subject = f"Guardian Security Report - {subject}"
|
|
236
|
+
|
|
237
|
+
return subject[:120] # Safety limit
|
|
238
|
+
except Exception as e:
|
|
239
|
+
logger.warning(f"LLM subject generation failed: {e}, using fallback")
|
|
240
|
+
return self._generate_subject_fallback(report)
|
|
241
|
+
|
|
242
|
+
def _generate_subject_fallback(self, report: dict[str, Any]) -> str:
|
|
243
|
+
"""Fallback subject line generation."""
|
|
244
|
+
summary = report.get("summary", {})
|
|
245
|
+
critical = summary.get("critical_vulnerabilities", 0)
|
|
246
|
+
unhealthy = summary.get("unhealthy_deployments", 0)
|
|
247
|
+
|
|
248
|
+
if critical > 0 or unhealthy > 0:
|
|
249
|
+
return f"Guardian Security Report - URGENT: {critical} critical, {unhealthy} unhealthy"
|
|
250
|
+
elif summary.get("total_vulnerabilities", 0) > 0:
|
|
251
|
+
return f"Guardian Security Report - ALERT: {summary.get('total_vulnerabilities', 0)} vulnerabilities"
|
|
252
|
+
else:
|
|
253
|
+
return "Guardian Security Report - Status: All systems healthy"
|
|
254
|
+
|
|
255
|
+
async def generate_executive_summary(
|
|
256
|
+
self, report: dict[str, Any], priority: str = "medium"
|
|
257
|
+
) -> str:
|
|
258
|
+
"""Generate executive summary using LLM."""
|
|
259
|
+
client_info = self._get_client()
|
|
260
|
+
if not client_info:
|
|
261
|
+
return self._generate_summary_fallback(report)
|
|
262
|
+
|
|
263
|
+
provider, client = client_info
|
|
264
|
+
|
|
265
|
+
prompt = f"""Generate a concise executive summary (2-3 sentences) for a security monitoring report.
|
|
266
|
+
|
|
267
|
+
Priority: {priority}
|
|
268
|
+
|
|
269
|
+
Report summary:
|
|
270
|
+
{json.dumps(report.get("summary", {}), indent=2)}
|
|
271
|
+
|
|
272
|
+
Top issues:
|
|
273
|
+
{json.dumps(report.get("issues", {}), indent=2)[:800]}
|
|
274
|
+
|
|
275
|
+
Write a brief, actionable summary that:
|
|
276
|
+
1. States the overall security posture
|
|
277
|
+
2. Highlights the most critical issues requiring attention
|
|
278
|
+
3. Provides context on urgency
|
|
279
|
+
|
|
280
|
+
Respond with ONLY the summary text, no markdown or formatting."""
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
if provider == "anthropic":
|
|
284
|
+
response = client.messages.create(
|
|
285
|
+
model="claude-3-5-sonnet-20241022",
|
|
286
|
+
max_tokens=200,
|
|
287
|
+
messages=[{"role": "user", "content": prompt}],
|
|
288
|
+
)
|
|
289
|
+
summary = response.content[0].text.strip()
|
|
290
|
+
elif provider in ("openai", "openrouter"):
|
|
291
|
+
model = "gpt-4o-mini" if provider == "openai" else "anthropic/claude-3.5-sonnet"
|
|
292
|
+
response = client.chat.completions.create(
|
|
293
|
+
model=model, messages=[{"role": "user", "content": prompt}], max_tokens=200
|
|
294
|
+
)
|
|
295
|
+
summary = response.choices[0].message.content.strip()
|
|
296
|
+
else:
|
|
297
|
+
raise ValueError(f"Unknown provider: {provider}")
|
|
298
|
+
|
|
299
|
+
return summary
|
|
300
|
+
except Exception as e:
|
|
301
|
+
logger.warning(f"LLM summary generation failed: {e}, using fallback")
|
|
302
|
+
return self._generate_summary_fallback(report)
|
|
303
|
+
|
|
304
|
+
def _generate_summary_fallback(self, report: dict[str, Any]) -> str:
|
|
305
|
+
"""Fallback summary generation."""
|
|
306
|
+
summary = report.get("summary", {})
|
|
307
|
+
critical = summary.get("critical_vulnerabilities", 0)
|
|
308
|
+
unhealthy = summary.get("unhealthy_deployments", 0)
|
|
309
|
+
|
|
310
|
+
if critical > 0:
|
|
311
|
+
return f"Critical security issues detected: {critical} critical vulnerabilities and {unhealthy} unhealthy deployments require immediate attention."
|
|
312
|
+
elif unhealthy > 0:
|
|
313
|
+
return f"Infrastructure issues detected: {unhealthy} unhealthy deployments need investigation."
|
|
314
|
+
elif summary.get("total_vulnerabilities", 0) > 0:
|
|
315
|
+
return f"Security vulnerabilities detected: {summary.get('total_vulnerabilities', 0)} total vulnerabilities found across monitored systems."
|
|
316
|
+
else:
|
|
317
|
+
return "All systems are operating normally with no critical security issues detected."
|
|
318
|
+
|
|
319
|
+
async def analyze_project_flaudit(
|
|
320
|
+
self,
|
|
321
|
+
prompt: str,
|
|
322
|
+
model_id: str = "google/gemini-2.5-flash",
|
|
323
|
+
severity_guidance: str | None = None,
|
|
324
|
+
public_repo_mode: bool = False,
|
|
325
|
+
) -> str:
|
|
326
|
+
"""Analyze project files (README/impl/tests) for flaws via OpenRouter + Gemini.
|
|
327
|
+
|
|
328
|
+
Prefers OpenRouter when model_id is a Google model (google/*) and
|
|
329
|
+
openrouter_api_key is set. Falls back to Anthropic/OpenAI otherwise.
|
|
330
|
+
|
|
331
|
+
severity_guidance: optional custom guidance; if unset, a default calibration is used.
|
|
332
|
+
public_repo_mode: use a stricter prompt for public crates (higher bar for docs/API/quality).
|
|
333
|
+
|
|
334
|
+
Returns raw LLM response text (JSON expected).
|
|
335
|
+
"""
|
|
336
|
+
default_severity = (
|
|
337
|
+
"Severity calibration: Reserve **critical** for security issues with a clear "
|
|
338
|
+
"exploit path (e.g. command injection with **external** user-controlled input). "
|
|
339
|
+
"Use **high** for correctness bugs or major doc/impl drift. Use **medium** for "
|
|
340
|
+
"doc gaps, test coverage, refactor suggestions. Use **low** for style, minor "
|
|
341
|
+
"duplication, or unverified concerns. Do NOT use critical for: internal scripts, "
|
|
342
|
+
"trusted inputs, or theoretical risks without an exploit path."
|
|
343
|
+
)
|
|
344
|
+
severity_block = severity_guidance if severity_guidance else default_severity
|
|
345
|
+
|
|
346
|
+
if public_repo_mode:
|
|
347
|
+
system_prompt = f"""You are a **critical** code quality auditor for **public** open-source crates. Your job is to be as strict as possible so maintainers can improve public-facing quality. Assume a first-time user will rely on the README and published API; any drift or missing step is a real failure.
|
|
348
|
+
|
|
349
|
+
Find flaws in these categories:
|
|
350
|
+
1. **readme_impl_drift**: README claims, quickstart steps, or API descriptions that do not match the implementation. Flag if the README example would not compile/run as written, or if documented functions/signatures are wrong or missing.
|
|
351
|
+
2. **readme_tests_mismatch**: Tests cover behavior not documented in README, or README describes behavior not tested. Public API surface should be both documented and tested.
|
|
352
|
+
3. **rules_violation**: Code or README disobeys project/workspace rules (e.g. no emojis, no marketing tone, truth boundary). Set **rule_ref** to the rule filename (e.g. user-core.mdc).
|
|
353
|
+
4. **other**: Missing or vague doc comments on public items, Cargo.toml/crate metadata inconsistent with README, security or safety considerations not mentioned, unclear error handling contract, or anything that would confuse or mislead a public user.
|
|
354
|
+
|
|
355
|
+
Be **critical**: prefer flagging a possible issue (medium/low) over missing a real one. If the README quickstart is incomplete (e.g. missing use statement or wrong path), that is at least **high**. If public API has no doc comment, that is at least **medium**. Do not be lenient because the crate is small.
|
|
356
|
+
|
|
357
|
+
{severity_block}
|
|
358
|
+
|
|
359
|
+
Respond with JSON only:
|
|
360
|
+
{{
|
|
361
|
+
"findings": [
|
|
362
|
+
{{
|
|
363
|
+
"severity": "critical|high|medium|low",
|
|
364
|
+
"category": "readme_impl_drift|readme_tests_mismatch|rules_violation|other",
|
|
365
|
+
"description": "concise description of the flaw",
|
|
366
|
+
"file_ref": "path or section reference if applicable",
|
|
367
|
+
"suggestion": "optional fix suggestion",
|
|
368
|
+
"rule_ref": "for rules_violation only: rule filename e.g. user-core.mdc"
|
|
369
|
+
}}
|
|
370
|
+
]
|
|
371
|
+
}}
|
|
372
|
+
|
|
373
|
+
If no flaws found, return {{"findings": []}}.
|
|
374
|
+
Return at most 16 findings, prioritized by severity (critical > high > medium > low). Keep each description to one sentence. Be concrete: cite file paths and line references."""
|
|
375
|
+
else:
|
|
376
|
+
system_prompt = f"""You are a code quality auditor. Analyze the provided project files (README, implementation, tests, and optional rules).
|
|
377
|
+
|
|
378
|
+
Find flaws in these categories:
|
|
379
|
+
1. **readme_impl_drift**: README claims or describes behavior that does not match the implementation.
|
|
380
|
+
2. **readme_tests_mismatch**: Tests cover behavior not documented in README, or README describes behavior not tested.
|
|
381
|
+
3. **rules_violation**: Code or README disobeys project/workspace rules (e.g. invariants: no emojis, no marketing tone, truth boundary, etc.). When citing a rules_violation, set **rule_ref** to the rule filename (e.g. user-core.mdc).
|
|
382
|
+
4. **other**: Other quality issues (missing tests, unclear docs, etc.).
|
|
383
|
+
|
|
384
|
+
{severity_block}
|
|
385
|
+
|
|
386
|
+
Respond with JSON only:
|
|
387
|
+
{{
|
|
388
|
+
"findings": [
|
|
389
|
+
{{
|
|
390
|
+
"severity": "critical|high|medium|low",
|
|
391
|
+
"category": "readme_impl_drift|readme_tests_mismatch|rules_violation|other",
|
|
392
|
+
"description": "concise description of the flaw",
|
|
393
|
+
"file_ref": "path or section reference if applicable",
|
|
394
|
+
"suggestion": "optional fix suggestion",
|
|
395
|
+
"rule_ref": "for rules_violation only: rule filename e.g. user-core.mdc"
|
|
396
|
+
}}
|
|
397
|
+
]
|
|
398
|
+
}}
|
|
399
|
+
|
|
400
|
+
If no flaws found, return {{"findings": []}}.
|
|
401
|
+
Return at most 12 findings, prioritized by severity (critical > high > medium > low). Keep each description to one sentence.
|
|
402
|
+
Be concrete: cite specific file paths and line references when possible."""
|
|
403
|
+
|
|
404
|
+
# Prefer OpenRouter for Google models when key is available
|
|
405
|
+
use_openrouter = (
|
|
406
|
+
model_id.startswith("google/")
|
|
407
|
+
and self.settings.openrouter_api_key is not None
|
|
408
|
+
)
|
|
409
|
+
if use_openrouter:
|
|
410
|
+
try:
|
|
411
|
+
import openai
|
|
412
|
+
client = openai.OpenAI(
|
|
413
|
+
api_key=str(self.settings.openrouter_api_key.get_secret_value()),
|
|
414
|
+
base_url="https://openrouter.ai/api/v1",
|
|
415
|
+
)
|
|
416
|
+
kwargs = {
|
|
417
|
+
"model": model_id,
|
|
418
|
+
"messages": [
|
|
419
|
+
{"role": "system", "content": system_prompt},
|
|
420
|
+
{"role": "user", "content": prompt},
|
|
421
|
+
],
|
|
422
|
+
"max_tokens": 8000,
|
|
423
|
+
}
|
|
424
|
+
# Try JSON mode first; fallback if model doesn't support it
|
|
425
|
+
try:
|
|
426
|
+
response = client.chat.completions.create(
|
|
427
|
+
**kwargs,
|
|
428
|
+
response_format={"type": "json_object"},
|
|
429
|
+
)
|
|
430
|
+
except Exception:
|
|
431
|
+
response = client.chat.completions.create(**kwargs)
|
|
432
|
+
return response.choices[0].message.content or "{}"
|
|
433
|
+
except Exception as e:
|
|
434
|
+
logger.warning(f"OpenRouter flaudit call failed: {e}")
|
|
435
|
+
return json.dumps({"findings": [], "error": str(e)})
|
|
436
|
+
|
|
437
|
+
client_info = self._get_client()
|
|
438
|
+
if not client_info:
|
|
439
|
+
return json.dumps({"findings": [], "error": "No LLM API key configured"})
|
|
440
|
+
|
|
441
|
+
provider, client = client_info
|
|
442
|
+
try:
|
|
443
|
+
if provider == "openrouter":
|
|
444
|
+
response = client.chat.completions.create(
|
|
445
|
+
model=model_id,
|
|
446
|
+
messages=[
|
|
447
|
+
{"role": "system", "content": system_prompt},
|
|
448
|
+
{"role": "user", "content": prompt},
|
|
449
|
+
],
|
|
450
|
+
max_tokens=8000,
|
|
451
|
+
response_format={"type": "json_object"},
|
|
452
|
+
)
|
|
453
|
+
return response.choices[0].message.content or "{}"
|
|
454
|
+
elif provider == "anthropic":
|
|
455
|
+
response = client.messages.create(
|
|
456
|
+
model="claude-3-5-sonnet-20241022",
|
|
457
|
+
max_tokens=4000,
|
|
458
|
+
messages=[
|
|
459
|
+
{"role": "user", "content": f"{system_prompt}\n\n---\n\n{prompt}"},
|
|
460
|
+
],
|
|
461
|
+
)
|
|
462
|
+
return response.content[0].text if response.content else "{}"
|
|
463
|
+
elif provider == "openai":
|
|
464
|
+
response = client.chat.completions.create(
|
|
465
|
+
model="gpt-4o-mini",
|
|
466
|
+
messages=[
|
|
467
|
+
{"role": "system", "content": system_prompt},
|
|
468
|
+
{"role": "user", "content": prompt},
|
|
469
|
+
],
|
|
470
|
+
max_tokens=4000,
|
|
471
|
+
)
|
|
472
|
+
return response.choices[0].message.content or "{}"
|
|
473
|
+
else:
|
|
474
|
+
return json.dumps({"findings": [], "error": f"Unknown provider: {provider}"})
|
|
475
|
+
except Exception as e:
|
|
476
|
+
logger.warning(f"Project flaudit LLM call failed: {e}")
|
|
477
|
+
return json.dumps({"findings": [], "error": str(e)})
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
|