gitflow-analytics 1.3.6__py3-none-any.whl → 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/batch_classifier.py +156 -4
- gitflow_analytics/cli.py +897 -179
- gitflow_analytics/config/loader.py +40 -1
- gitflow_analytics/config/schema.py +4 -0
- gitflow_analytics/core/cache.py +20 -0
- gitflow_analytics/core/data_fetcher.py +1254 -228
- gitflow_analytics/core/git_auth.py +169 -0
- gitflow_analytics/core/git_timeout_wrapper.py +347 -0
- gitflow_analytics/core/metrics_storage.py +12 -3
- gitflow_analytics/core/progress.py +219 -18
- gitflow_analytics/core/subprocess_git.py +145 -0
- gitflow_analytics/extractors/ml_tickets.py +3 -2
- gitflow_analytics/extractors/tickets.py +93 -8
- gitflow_analytics/integrations/jira_integration.py +1 -1
- gitflow_analytics/integrations/orchestrator.py +47 -29
- gitflow_analytics/metrics/branch_health.py +3 -2
- gitflow_analytics/models/database.py +72 -1
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +12 -5
- gitflow_analytics/pm_framework/orchestrator.py +8 -3
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +24 -4
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +3 -1
- gitflow_analytics/qualitative/core/llm_fallback.py +34 -2
- gitflow_analytics/reports/narrative_writer.py +118 -74
- gitflow_analytics/security/__init__.py +11 -0
- gitflow_analytics/security/config.py +189 -0
- gitflow_analytics/security/extractors/__init__.py +7 -0
- gitflow_analytics/security/extractors/dependency_checker.py +379 -0
- gitflow_analytics/security/extractors/secret_detector.py +197 -0
- gitflow_analytics/security/extractors/vulnerability_scanner.py +333 -0
- gitflow_analytics/security/llm_analyzer.py +347 -0
- gitflow_analytics/security/reports/__init__.py +5 -0
- gitflow_analytics/security/reports/security_report.py +358 -0
- gitflow_analytics/security/security_analyzer.py +414 -0
- gitflow_analytics/tui/app.py +3 -1
- gitflow_analytics/tui/progress_adapter.py +313 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +407 -46
- gitflow_analytics/tui/screens/results_screen.py +219 -206
- gitflow_analytics/ui/__init__.py +21 -0
- gitflow_analytics/ui/progress_display.py +1477 -0
- gitflow_analytics/verify_activity.py +697 -0
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/METADATA +2 -1
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/RECORD +47 -31
- gitflow_analytics/cli_rich.py +0 -503
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""Vulnerability scanning using multiple security tools and LLM analysis."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
import shutil
|
|
7
|
+
import subprocess
|
|
8
|
+
import tempfile
|
|
9
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class VulnerabilityScanner:
|
|
17
|
+
"""Scan code for security vulnerabilities using tools and patterns."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, config: Any):
|
|
20
|
+
"""Initialize vulnerability scanner with configuration."""
|
|
21
|
+
self.config = config
|
|
22
|
+
self.vulnerability_patterns = {
|
|
23
|
+
name: re.compile(pattern) for name, pattern in config.vulnerability_patterns.items()
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# Check which tools are available
|
|
27
|
+
self.available_tools = self._detect_available_tools()
|
|
28
|
+
|
|
29
|
+
def scan_files(self, files_changed: List[str], repo_path: Path) -> List[Dict]:
|
|
30
|
+
"""Scan changed files for vulnerabilities.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
files_changed: List of changed file paths
|
|
34
|
+
repo_path: Path to repository
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
List of vulnerability findings
|
|
38
|
+
"""
|
|
39
|
+
findings = []
|
|
40
|
+
|
|
41
|
+
# Quick pattern-based scanning
|
|
42
|
+
pattern_findings = self._scan_with_patterns(files_changed, repo_path)
|
|
43
|
+
findings.extend(pattern_findings)
|
|
44
|
+
|
|
45
|
+
# Tool-based scanning (run in parallel for performance)
|
|
46
|
+
if self.available_tools:
|
|
47
|
+
tool_findings = self._scan_with_tools(files_changed, repo_path)
|
|
48
|
+
findings.extend(tool_findings)
|
|
49
|
+
|
|
50
|
+
return findings
|
|
51
|
+
|
|
52
|
+
def _detect_available_tools(self) -> Dict[str, bool]:
|
|
53
|
+
"""Detect which security tools are installed."""
|
|
54
|
+
tools = {}
|
|
55
|
+
|
|
56
|
+
# Check for Semgrep
|
|
57
|
+
if self.config.enable_semgrep:
|
|
58
|
+
tools["semgrep"] = self._is_tool_available("semgrep")
|
|
59
|
+
if not tools["semgrep"]:
|
|
60
|
+
logger.info("Semgrep not found. Install with: pip install semgrep")
|
|
61
|
+
|
|
62
|
+
# Check for Bandit (Python)
|
|
63
|
+
if self.config.enable_bandit:
|
|
64
|
+
tools["bandit"] = self._is_tool_available("bandit")
|
|
65
|
+
if not tools["bandit"]:
|
|
66
|
+
logger.info("Bandit not found. Install with: pip install bandit")
|
|
67
|
+
|
|
68
|
+
# Check for gosec (Go)
|
|
69
|
+
if self.config.enable_gosec:
|
|
70
|
+
tools["gosec"] = self._is_tool_available("gosec")
|
|
71
|
+
if not tools["gosec"]:
|
|
72
|
+
logger.info("Gosec not found. Install from: https://github.com/securego/gosec")
|
|
73
|
+
|
|
74
|
+
return tools
|
|
75
|
+
|
|
76
|
+
def _is_tool_available(self, tool_name: str) -> bool:
|
|
77
|
+
"""Check if a tool is available in PATH."""
|
|
78
|
+
return shutil.which(tool_name) is not None
|
|
79
|
+
|
|
80
|
+
def _scan_with_patterns(self, files_changed: List[str], repo_path: Path) -> List[Dict]:
|
|
81
|
+
"""Quick pattern-based vulnerability detection."""
|
|
82
|
+
findings = []
|
|
83
|
+
|
|
84
|
+
for file_path in files_changed:
|
|
85
|
+
full_path = repo_path / file_path
|
|
86
|
+
if not full_path.exists() or not full_path.is_file():
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
content = full_path.read_text(encoding="utf-8", errors="ignore")
|
|
91
|
+
|
|
92
|
+
for vuln_type, pattern in self.vulnerability_patterns.items():
|
|
93
|
+
for match in pattern.finditer(content):
|
|
94
|
+
line_num = content[: match.start()].count("\n") + 1
|
|
95
|
+
finding = {
|
|
96
|
+
"type": "vulnerability",
|
|
97
|
+
"vulnerability_type": vuln_type,
|
|
98
|
+
"severity": self._get_vuln_severity(vuln_type),
|
|
99
|
+
"file": file_path,
|
|
100
|
+
"line": line_num,
|
|
101
|
+
"message": f"Potential {vuln_type.replace('_', ' ')} detected",
|
|
102
|
+
"tool": "pattern_matcher",
|
|
103
|
+
"confidence": "medium",
|
|
104
|
+
}
|
|
105
|
+
findings.append(finding)
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logger.debug(f"Error scanning {file_path}: {e}")
|
|
108
|
+
|
|
109
|
+
return findings
|
|
110
|
+
|
|
111
|
+
def _scan_with_tools(self, files_changed: List[str], repo_path: Path) -> List[Dict]:
|
|
112
|
+
"""Run security tools on changed files."""
|
|
113
|
+
all_findings = []
|
|
114
|
+
|
|
115
|
+
# Group files by language for efficient tool execution
|
|
116
|
+
files_by_language = self._group_files_by_language(files_changed)
|
|
117
|
+
|
|
118
|
+
with ThreadPoolExecutor(max_workers=4) as executor:
|
|
119
|
+
futures = []
|
|
120
|
+
|
|
121
|
+
# Run Semgrep if available (works on all languages)
|
|
122
|
+
if self.available_tools.get("semgrep"):
|
|
123
|
+
future = executor.submit(self._run_semgrep, files_changed, repo_path)
|
|
124
|
+
futures.append(("semgrep", future))
|
|
125
|
+
|
|
126
|
+
# Run Bandit on Python files
|
|
127
|
+
if self.available_tools.get("bandit") and files_by_language.get("python"):
|
|
128
|
+
future = executor.submit(self._run_bandit, files_by_language["python"], repo_path)
|
|
129
|
+
futures.append(("bandit", future))
|
|
130
|
+
|
|
131
|
+
# Run gosec on Go files
|
|
132
|
+
if self.available_tools.get("gosec") and files_by_language.get("go"):
|
|
133
|
+
future = executor.submit(self._run_gosec, files_by_language["go"], repo_path)
|
|
134
|
+
futures.append(("gosec", future))
|
|
135
|
+
|
|
136
|
+
# Collect results
|
|
137
|
+
for tool_name, future in futures:
|
|
138
|
+
try:
|
|
139
|
+
findings = future.result(timeout=30)
|
|
140
|
+
all_findings.extend(findings)
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.warning(f"Error running {tool_name}: {e}")
|
|
143
|
+
|
|
144
|
+
return all_findings
|
|
145
|
+
|
|
146
|
+
def _group_files_by_language(self, files: List[str]) -> Dict[str, List[str]]:
|
|
147
|
+
"""Group files by programming language."""
|
|
148
|
+
groups = {}
|
|
149
|
+
|
|
150
|
+
language_extensions = {
|
|
151
|
+
"python": [".py"],
|
|
152
|
+
"go": [".go"],
|
|
153
|
+
"javascript": [".js", ".jsx", ".ts", ".tsx"],
|
|
154
|
+
"java": [".java"],
|
|
155
|
+
"ruby": [".rb"],
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
for file_path in files:
|
|
159
|
+
path = Path(file_path)
|
|
160
|
+
for language, extensions in language_extensions.items():
|
|
161
|
+
if path.suffix in extensions:
|
|
162
|
+
if language not in groups:
|
|
163
|
+
groups[language] = []
|
|
164
|
+
groups[language].append(file_path)
|
|
165
|
+
break
|
|
166
|
+
|
|
167
|
+
return groups
|
|
168
|
+
|
|
169
|
+
def _run_semgrep(self, files: List[str], repo_path: Path) -> List[Dict]:
|
|
170
|
+
"""Run Semgrep security scanning."""
|
|
171
|
+
findings = []
|
|
172
|
+
|
|
173
|
+
if not files:
|
|
174
|
+
return findings
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
# Create temporary file list for semgrep
|
|
178
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
179
|
+
for file_path in files:
|
|
180
|
+
f.write(f"{file_path}\n")
|
|
181
|
+
file_list_path = f.name
|
|
182
|
+
|
|
183
|
+
cmd = [
|
|
184
|
+
"semgrep",
|
|
185
|
+
"--config=auto", # Use automatic rules
|
|
186
|
+
"--json",
|
|
187
|
+
"--no-error",
|
|
188
|
+
f"--include-list={file_list_path}",
|
|
189
|
+
str(repo_path),
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=repo_path)
|
|
193
|
+
|
|
194
|
+
if result.returncode == 0 and result.stdout:
|
|
195
|
+
data = json.loads(result.stdout)
|
|
196
|
+
for finding in data.get("results", []):
|
|
197
|
+
findings.append(
|
|
198
|
+
{
|
|
199
|
+
"type": "vulnerability",
|
|
200
|
+
"vulnerability_type": finding.get("check_id", "unknown"),
|
|
201
|
+
"severity": self._map_semgrep_severity(
|
|
202
|
+
finding.get("extra", {}).get("severity")
|
|
203
|
+
),
|
|
204
|
+
"file": Path(finding["path"]).relative_to(repo_path).as_posix(),
|
|
205
|
+
"line": finding.get("start", {}).get("line", 0),
|
|
206
|
+
"message": finding.get("extra", {}).get(
|
|
207
|
+
"message", "Security issue detected"
|
|
208
|
+
),
|
|
209
|
+
"tool": "semgrep",
|
|
210
|
+
"confidence": "high",
|
|
211
|
+
}
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Clean up temp file
|
|
215
|
+
Path(file_list_path).unlink()
|
|
216
|
+
|
|
217
|
+
except Exception as e:
|
|
218
|
+
logger.warning(f"Error running Semgrep: {e}")
|
|
219
|
+
|
|
220
|
+
return findings
|
|
221
|
+
|
|
222
|
+
def _run_bandit(self, files: List[str], repo_path: Path) -> List[Dict]:
|
|
223
|
+
"""Run Bandit for Python security scanning."""
|
|
224
|
+
findings = []
|
|
225
|
+
|
|
226
|
+
if not files:
|
|
227
|
+
return findings
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
# Bandit expects full paths
|
|
231
|
+
full_paths = [str(repo_path / f) for f in files if (repo_path / f).exists()]
|
|
232
|
+
|
|
233
|
+
if not full_paths:
|
|
234
|
+
return findings
|
|
235
|
+
|
|
236
|
+
cmd = ["bandit", "-f", "json", "-ll", *full_paths] # Low severity and higher
|
|
237
|
+
|
|
238
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
239
|
+
|
|
240
|
+
if result.stdout:
|
|
241
|
+
data = json.loads(result.stdout)
|
|
242
|
+
for finding in data.get("results", []):
|
|
243
|
+
findings.append(
|
|
244
|
+
{
|
|
245
|
+
"type": "vulnerability",
|
|
246
|
+
"vulnerability_type": finding.get("test_id", "unknown"),
|
|
247
|
+
"severity": finding.get("issue_severity", "medium").lower(),
|
|
248
|
+
"file": Path(finding["filename"]).relative_to(repo_path).as_posix(),
|
|
249
|
+
"line": finding.get("line_number", 0),
|
|
250
|
+
"message": finding.get("issue_text", "Security issue detected"),
|
|
251
|
+
"tool": "bandit",
|
|
252
|
+
"confidence": finding.get("issue_confidence", "medium").lower(),
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
except Exception as e:
|
|
257
|
+
logger.warning(f"Error running Bandit: {e}")
|
|
258
|
+
|
|
259
|
+
return findings
|
|
260
|
+
|
|
261
|
+
def _run_gosec(self, files: List[str], repo_path: Path) -> List[Dict]:
|
|
262
|
+
"""Run gosec for Go security scanning."""
|
|
263
|
+
findings = []
|
|
264
|
+
|
|
265
|
+
if not files:
|
|
266
|
+
return findings
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
# gosec works on directories, so we scan the whole repo but filter results
|
|
270
|
+
cmd = ["gosec", "-fmt", "json", "./..."]
|
|
271
|
+
|
|
272
|
+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=repo_path)
|
|
273
|
+
|
|
274
|
+
if result.stdout:
|
|
275
|
+
data = json.loads(result.stdout)
|
|
276
|
+
for finding in data.get("Issues", []):
|
|
277
|
+
file_path = Path(finding["file"]).relative_to(repo_path).as_posix()
|
|
278
|
+
|
|
279
|
+
# Only include findings for changed files
|
|
280
|
+
if file_path in files:
|
|
281
|
+
findings.append(
|
|
282
|
+
{
|
|
283
|
+
"type": "vulnerability",
|
|
284
|
+
"vulnerability_type": finding.get("rule_id", "unknown"),
|
|
285
|
+
"severity": self._map_gosec_severity(finding.get("severity")),
|
|
286
|
+
"file": file_path,
|
|
287
|
+
"line": int(finding.get("line", "0")),
|
|
288
|
+
"message": finding.get("details", "Security issue detected"),
|
|
289
|
+
"tool": "gosec",
|
|
290
|
+
"confidence": finding.get("confidence", "medium").lower(),
|
|
291
|
+
}
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
except Exception as e:
|
|
295
|
+
logger.warning(f"Error running gosec: {e}")
|
|
296
|
+
|
|
297
|
+
return findings
|
|
298
|
+
|
|
299
|
+
def _get_vuln_severity(self, vuln_type: str) -> str:
|
|
300
|
+
"""Map vulnerability type to severity."""
|
|
301
|
+
critical_types = ["sql_injection", "command_injection", "path_traversal"]
|
|
302
|
+
high_types = ["xss", "weak_crypto"]
|
|
303
|
+
|
|
304
|
+
if vuln_type in critical_types:
|
|
305
|
+
return "critical"
|
|
306
|
+
elif vuln_type in high_types:
|
|
307
|
+
return "high"
|
|
308
|
+
else:
|
|
309
|
+
return "medium"
|
|
310
|
+
|
|
311
|
+
def _map_semgrep_severity(self, severity: Optional[str]) -> str:
|
|
312
|
+
"""Map Semgrep severity to our severity scale."""
|
|
313
|
+
if not severity:
|
|
314
|
+
return "medium"
|
|
315
|
+
severity = severity.upper()
|
|
316
|
+
if severity == "ERROR":
|
|
317
|
+
return "critical"
|
|
318
|
+
elif severity == "WARNING":
|
|
319
|
+
return "high"
|
|
320
|
+
else:
|
|
321
|
+
return "medium"
|
|
322
|
+
|
|
323
|
+
def _map_gosec_severity(self, severity: Optional[str]) -> str:
|
|
324
|
+
"""Map gosec severity to our severity scale."""
|
|
325
|
+
if not severity:
|
|
326
|
+
return "medium"
|
|
327
|
+
severity = severity.upper()
|
|
328
|
+
if severity == "HIGH":
|
|
329
|
+
return "critical"
|
|
330
|
+
elif severity == "MEDIUM":
|
|
331
|
+
return "high"
|
|
332
|
+
else:
|
|
333
|
+
return "medium"
|
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
"""LLM-based security analysis for comprehensive code review."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LLMSecurityAnalyzer:
|
|
16
|
+
"""Use LLM to analyze code changes for security issues that tools might miss."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, config: Any, cache_dir: Optional[Path] = None):
|
|
19
|
+
"""Initialize LLM security analyzer.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
config: LLM security configuration
|
|
23
|
+
cache_dir: Directory for caching LLM responses
|
|
24
|
+
"""
|
|
25
|
+
self.config = config
|
|
26
|
+
self.api_key = (
|
|
27
|
+
config.api_key or os.getenv("OPENROUTER_API_KEY") or os.getenv("ANTHROPIC_API_KEY")
|
|
28
|
+
)
|
|
29
|
+
self.model = config.model
|
|
30
|
+
self.cache_dir = cache_dir or Path(".gitflow-cache/llm_security")
|
|
31
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
|
|
33
|
+
# Cache LLM responses for 7 days to save costs
|
|
34
|
+
self.cache_ttl = timedelta(days=7)
|
|
35
|
+
|
|
36
|
+
def analyze_commit(self, commit_data: Dict) -> List[Dict]:
|
|
37
|
+
"""Analyze a commit for security issues using LLM.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
commit_data: Commit data with message, files_changed, etc.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List of security findings
|
|
44
|
+
"""
|
|
45
|
+
if not self.api_key:
|
|
46
|
+
logger.debug("LLM API key not configured, skipping LLM security analysis")
|
|
47
|
+
return []
|
|
48
|
+
|
|
49
|
+
findings = []
|
|
50
|
+
|
|
51
|
+
# Check cache first
|
|
52
|
+
cache_key = self._get_cache_key(commit_data)
|
|
53
|
+
cached_result = self._get_cached_result(cache_key)
|
|
54
|
+
if cached_result is not None:
|
|
55
|
+
return cached_result
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
# Analyze commit message and metadata
|
|
59
|
+
commit_findings = self._analyze_commit_message(commit_data)
|
|
60
|
+
findings.extend(commit_findings)
|
|
61
|
+
|
|
62
|
+
# Analyze code changes if available
|
|
63
|
+
if "diff_content" in commit_data:
|
|
64
|
+
code_findings = self._analyze_code_changes(commit_data)
|
|
65
|
+
findings.extend(code_findings)
|
|
66
|
+
|
|
67
|
+
# Cache the results
|
|
68
|
+
self._cache_result(cache_key, findings)
|
|
69
|
+
|
|
70
|
+
except Exception as e:
|
|
71
|
+
logger.warning(f"Error in LLM security analysis: {e}")
|
|
72
|
+
|
|
73
|
+
return findings
|
|
74
|
+
|
|
75
|
+
def _analyze_commit_message(self, commit_data: Dict) -> List[Dict]:
|
|
76
|
+
"""Analyze commit message for security implications."""
|
|
77
|
+
prompt = self.config.commit_review_prompt.format(
|
|
78
|
+
message=commit_data.get("message", ""),
|
|
79
|
+
files=", ".join(commit_data.get("files_changed", [])),
|
|
80
|
+
category=commit_data.get("category", "unknown"),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
response = self._call_llm(prompt)
|
|
84
|
+
return self._parse_llm_response(response, commit_data)
|
|
85
|
+
|
|
86
|
+
def _analyze_code_changes(self, commit_data: Dict) -> List[Dict]:
|
|
87
|
+
"""Analyze actual code changes for security issues."""
|
|
88
|
+
# Limit the amount of code sent to LLM for cost control
|
|
89
|
+
lines_added = commit_data.get("diff_content", "")
|
|
90
|
+
if len(lines_added.split("\n")) > self.config.max_lines_for_llm:
|
|
91
|
+
lines_added = "\n".join(lines_added.split("\n")[: self.config.max_lines_for_llm])
|
|
92
|
+
lines_added += "\n... (truncated for analysis)"
|
|
93
|
+
|
|
94
|
+
prompt = self.config.code_review_prompt.format(
|
|
95
|
+
files_changed=", ".join(commit_data.get("files_changed", [])), lines_added=lines_added
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
response = self._call_llm(prompt)
|
|
99
|
+
return self._parse_llm_response(response, commit_data, is_code_analysis=True)
|
|
100
|
+
|
|
101
|
+
def _call_llm(self, prompt: str) -> str:
|
|
102
|
+
"""Call the LLM API with the given prompt."""
|
|
103
|
+
if self.model.startswith("claude"):
|
|
104
|
+
return self._call_anthropic(prompt)
|
|
105
|
+
else:
|
|
106
|
+
return self._call_openrouter(prompt)
|
|
107
|
+
|
|
108
|
+
def _call_anthropic(self, prompt: str) -> str:
|
|
109
|
+
"""Call Anthropic's Claude API."""
|
|
110
|
+
try:
|
|
111
|
+
headers = {
|
|
112
|
+
"x-api-key": self.api_key,
|
|
113
|
+
"anthropic-version": "2023-06-01",
|
|
114
|
+
"content-type": "application/json",
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
data = {
|
|
118
|
+
"model": self.model,
|
|
119
|
+
"max_tokens": 500,
|
|
120
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
121
|
+
"temperature": 0.1, # Low temperature for consistent analysis
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
with httpx.Client() as client:
|
|
125
|
+
response = client.post(
|
|
126
|
+
"https://api.anthropic.com/v1/messages", headers=headers, json=data, timeout=30
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if response.status_code == 200:
|
|
130
|
+
return response.json()["content"][0]["text"]
|
|
131
|
+
else:
|
|
132
|
+
logger.warning(f"Claude API error: {response.status_code}")
|
|
133
|
+
return ""
|
|
134
|
+
|
|
135
|
+
except Exception as e:
|
|
136
|
+
logger.warning(f"Error calling Claude API: {e}")
|
|
137
|
+
return ""
|
|
138
|
+
|
|
139
|
+
def _call_openrouter(self, prompt: str) -> str:
|
|
140
|
+
"""Call OpenRouter API for various LLM models."""
|
|
141
|
+
try:
|
|
142
|
+
headers = {
|
|
143
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
144
|
+
"Content-Type": "application/json",
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
data = {
|
|
148
|
+
"model": self.model,
|
|
149
|
+
"messages": [
|
|
150
|
+
{
|
|
151
|
+
"role": "system",
|
|
152
|
+
"content": "You are a security expert analyzing code for vulnerabilities. Be concise and specific.",
|
|
153
|
+
},
|
|
154
|
+
{"role": "user", "content": prompt},
|
|
155
|
+
],
|
|
156
|
+
"max_tokens": 500,
|
|
157
|
+
"temperature": 0.1,
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
with httpx.Client() as client:
|
|
161
|
+
response = client.post(
|
|
162
|
+
"https://openrouter.ai/api/v1/chat/completions",
|
|
163
|
+
headers=headers,
|
|
164
|
+
json=data,
|
|
165
|
+
timeout=30,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if response.status_code == 200:
|
|
169
|
+
return response.json()["choices"][0]["message"]["content"]
|
|
170
|
+
else:
|
|
171
|
+
logger.warning(f"OpenRouter API error: {response.status_code}")
|
|
172
|
+
return ""
|
|
173
|
+
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.warning(f"Error calling OpenRouter API: {e}")
|
|
176
|
+
return ""
|
|
177
|
+
|
|
178
|
+
def _parse_llm_response(
|
|
179
|
+
self, response: str, commit_data: Dict, is_code_analysis: bool = False
|
|
180
|
+
) -> List[Dict]:
|
|
181
|
+
"""Parse LLM response and extract security findings."""
|
|
182
|
+
findings = []
|
|
183
|
+
|
|
184
|
+
if not response or "no security issues" in response.lower():
|
|
185
|
+
return findings
|
|
186
|
+
|
|
187
|
+
# Extract specific security concerns from the response
|
|
188
|
+
security_keywords = {
|
|
189
|
+
"authentication": ("high", "authentication"),
|
|
190
|
+
"authorization": ("high", "authorization"),
|
|
191
|
+
"injection": ("critical", "injection"),
|
|
192
|
+
"sql": ("critical", "sql_injection"),
|
|
193
|
+
"xss": ("high", "xss"),
|
|
194
|
+
"csrf": ("high", "csrf"),
|
|
195
|
+
"exposure": ("high", "data_exposure"),
|
|
196
|
+
"credential": ("critical", "credential_exposure"),
|
|
197
|
+
"secret": ("critical", "secret_exposure"),
|
|
198
|
+
"crypto": ("high", "weak_cryptography"),
|
|
199
|
+
"validation": ("medium", "input_validation"),
|
|
200
|
+
"sanitization": ("medium", "input_sanitization"),
|
|
201
|
+
"permission": ("high", "permission_issue"),
|
|
202
|
+
"privilege": ("high", "privilege_escalation"),
|
|
203
|
+
"buffer": ("critical", "buffer_overflow"),
|
|
204
|
+
"race": ("high", "race_condition"),
|
|
205
|
+
"session": ("high", "session_management"),
|
|
206
|
+
"cookie": ("medium", "cookie_security"),
|
|
207
|
+
"cors": ("medium", "cors_misconfiguration"),
|
|
208
|
+
"encryption": ("high", "encryption_issue"),
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
# Check for security keywords in the response
|
|
212
|
+
response_lower = response.lower()
|
|
213
|
+
found_issues = []
|
|
214
|
+
|
|
215
|
+
for keyword, (severity, issue_type) in security_keywords.items():
|
|
216
|
+
if keyword in response_lower:
|
|
217
|
+
found_issues.append((severity, issue_type))
|
|
218
|
+
|
|
219
|
+
# Create findings based on detected issues
|
|
220
|
+
if found_issues:
|
|
221
|
+
# Extract the most severe issue
|
|
222
|
+
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
|
223
|
+
found_issues.sort(key=lambda x: severity_order.get(x[0], 999))
|
|
224
|
+
|
|
225
|
+
finding = {
|
|
226
|
+
"type": "security",
|
|
227
|
+
"source": "llm_analysis",
|
|
228
|
+
"vulnerability_type": found_issues[0][1],
|
|
229
|
+
"severity": found_issues[0][0],
|
|
230
|
+
"commit": commit_data.get("commit_hash_short", "unknown"),
|
|
231
|
+
"message": self._extract_finding_message(response),
|
|
232
|
+
"confidence": self._calculate_confidence(response),
|
|
233
|
+
"analysis_type": "code" if is_code_analysis else "commit",
|
|
234
|
+
"files": commit_data.get("files_changed", []),
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
findings.append(finding)
|
|
238
|
+
|
|
239
|
+
return findings
|
|
240
|
+
|
|
241
|
+
def _extract_finding_message(self, response: str) -> str:
|
|
242
|
+
"""Extract a concise finding message from LLM response."""
|
|
243
|
+
# Take the first meaningful sentence
|
|
244
|
+
sentences = response.split(".")
|
|
245
|
+
for sentence in sentences:
|
|
246
|
+
sentence = sentence.strip()
|
|
247
|
+
if len(sentence) > 20 and not sentence.lower().startswith(("the", "this", "it")):
|
|
248
|
+
return sentence + "."
|
|
249
|
+
|
|
250
|
+
# Fallback to truncated response
|
|
251
|
+
return response[:200] + "..." if len(response) > 200 else response
|
|
252
|
+
|
|
253
|
+
def _calculate_confidence(self, response: str) -> str:
|
|
254
|
+
"""Calculate confidence level based on LLM response characteristics."""
|
|
255
|
+
response_lower = response.lower()
|
|
256
|
+
|
|
257
|
+
# High confidence indicators
|
|
258
|
+
high_confidence_words = [
|
|
259
|
+
"definitely",
|
|
260
|
+
"clearly",
|
|
261
|
+
"certain",
|
|
262
|
+
"obvious",
|
|
263
|
+
"critical",
|
|
264
|
+
"severe",
|
|
265
|
+
]
|
|
266
|
+
if any(word in response_lower for word in high_confidence_words):
|
|
267
|
+
return "high"
|
|
268
|
+
|
|
269
|
+
# Low confidence indicators
|
|
270
|
+
low_confidence_words = ["might", "could", "possibly", "perhaps", "may", "potential"]
|
|
271
|
+
if any(word in response_lower for word in low_confidence_words):
|
|
272
|
+
return "medium"
|
|
273
|
+
|
|
274
|
+
return "high" if len(response) > 100 else "medium"
|
|
275
|
+
|
|
276
|
+
def _get_cache_key(self, commit_data: Dict) -> str:
|
|
277
|
+
"""Generate cache key for commit data."""
|
|
278
|
+
key_parts = [
|
|
279
|
+
commit_data.get("commit_hash", ""),
|
|
280
|
+
str(sorted(commit_data.get("files_changed", []))),
|
|
281
|
+
commit_data.get("message", "")[:100],
|
|
282
|
+
]
|
|
283
|
+
key_str = "|".join(key_parts)
|
|
284
|
+
# Simple hash for filename
|
|
285
|
+
import hashlib
|
|
286
|
+
|
|
287
|
+
return hashlib.sha256(key_str.encode()).hexdigest()[:16]
|
|
288
|
+
|
|
289
|
+
def _get_cached_result(self, cache_key: str) -> Optional[List[Dict]]:
|
|
290
|
+
"""Get cached result if it exists and is not expired."""
|
|
291
|
+
cache_file = self.cache_dir / f"{cache_key}.json"
|
|
292
|
+
if not cache_file.exists():
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
try:
|
|
296
|
+
# Check if cache is expired
|
|
297
|
+
file_time = datetime.fromtimestamp(cache_file.stat().st_mtime)
|
|
298
|
+
if datetime.now() - file_time > self.cache_ttl:
|
|
299
|
+
cache_file.unlink() # Delete expired cache
|
|
300
|
+
return None
|
|
301
|
+
|
|
302
|
+
with open(cache_file) as f:
|
|
303
|
+
return json.load(f)
|
|
304
|
+
except Exception as e:
|
|
305
|
+
logger.debug(f"Error reading cache: {e}")
|
|
306
|
+
return None
|
|
307
|
+
|
|
308
|
+
def _cache_result(self, cache_key: str, result: List[Dict]) -> None:
|
|
309
|
+
"""Cache the analysis result."""
|
|
310
|
+
cache_file = self.cache_dir / f"{cache_key}.json"
|
|
311
|
+
try:
|
|
312
|
+
with open(cache_file, "w") as f:
|
|
313
|
+
json.dump(result, f)
|
|
314
|
+
except Exception as e:
|
|
315
|
+
logger.debug(f"Error writing cache: {e}")
|
|
316
|
+
|
|
317
|
+
def generate_security_insights(self, all_findings: List[Dict]) -> str:
|
|
318
|
+
"""Generate high-level security insights from all findings."""
|
|
319
|
+
if not all_findings:
|
|
320
|
+
return "No security issues detected in the analyzed period."
|
|
321
|
+
|
|
322
|
+
# Aggregate findings
|
|
323
|
+
by_severity = {}
|
|
324
|
+
by_type = {}
|
|
325
|
+
|
|
326
|
+
for finding in all_findings:
|
|
327
|
+
severity = finding.get("severity", "unknown")
|
|
328
|
+
vuln_type = finding.get("vulnerability_type", "unknown")
|
|
329
|
+
|
|
330
|
+
by_severity[severity] = by_severity.get(severity, 0) + 1
|
|
331
|
+
by_type[vuln_type] = by_type.get(vuln_type, 0) + 1
|
|
332
|
+
|
|
333
|
+
# Generate insights prompt
|
|
334
|
+
prompt = f"""Analyze these security findings and provide strategic recommendations:
|
|
335
|
+
|
|
336
|
+
Findings by severity: {json.dumps(by_severity, indent=2)}
|
|
337
|
+
Findings by type: {json.dumps(by_type, indent=2)}
|
|
338
|
+
|
|
339
|
+
Provide:
|
|
340
|
+
1. Top 3 security risks to address
|
|
341
|
+
2. Recommended security improvements
|
|
342
|
+
3. Security training needs for the team
|
|
343
|
+
|
|
344
|
+
Be concise and actionable."""
|
|
345
|
+
|
|
346
|
+
response = self._call_llm(prompt)
|
|
347
|
+
return response if response else "Unable to generate security insights."
|