gitflow-analytics 1.3.6__py3-none-any.whl → 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/batch_classifier.py +156 -4
  3. gitflow_analytics/cli.py +897 -179
  4. gitflow_analytics/config/loader.py +40 -1
  5. gitflow_analytics/config/schema.py +4 -0
  6. gitflow_analytics/core/cache.py +20 -0
  7. gitflow_analytics/core/data_fetcher.py +1254 -228
  8. gitflow_analytics/core/git_auth.py +169 -0
  9. gitflow_analytics/core/git_timeout_wrapper.py +347 -0
  10. gitflow_analytics/core/metrics_storage.py +12 -3
  11. gitflow_analytics/core/progress.py +219 -18
  12. gitflow_analytics/core/subprocess_git.py +145 -0
  13. gitflow_analytics/extractors/ml_tickets.py +3 -2
  14. gitflow_analytics/extractors/tickets.py +93 -8
  15. gitflow_analytics/integrations/jira_integration.py +1 -1
  16. gitflow_analytics/integrations/orchestrator.py +47 -29
  17. gitflow_analytics/metrics/branch_health.py +3 -2
  18. gitflow_analytics/models/database.py +72 -1
  19. gitflow_analytics/pm_framework/adapters/jira_adapter.py +12 -5
  20. gitflow_analytics/pm_framework/orchestrator.py +8 -3
  21. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +24 -4
  22. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +3 -1
  23. gitflow_analytics/qualitative/core/llm_fallback.py +34 -2
  24. gitflow_analytics/reports/narrative_writer.py +118 -74
  25. gitflow_analytics/security/__init__.py +11 -0
  26. gitflow_analytics/security/config.py +189 -0
  27. gitflow_analytics/security/extractors/__init__.py +7 -0
  28. gitflow_analytics/security/extractors/dependency_checker.py +379 -0
  29. gitflow_analytics/security/extractors/secret_detector.py +197 -0
  30. gitflow_analytics/security/extractors/vulnerability_scanner.py +333 -0
  31. gitflow_analytics/security/llm_analyzer.py +347 -0
  32. gitflow_analytics/security/reports/__init__.py +5 -0
  33. gitflow_analytics/security/reports/security_report.py +358 -0
  34. gitflow_analytics/security/security_analyzer.py +414 -0
  35. gitflow_analytics/tui/app.py +3 -1
  36. gitflow_analytics/tui/progress_adapter.py +313 -0
  37. gitflow_analytics/tui/screens/analysis_progress_screen.py +407 -46
  38. gitflow_analytics/tui/screens/results_screen.py +219 -206
  39. gitflow_analytics/ui/__init__.py +21 -0
  40. gitflow_analytics/ui/progress_display.py +1477 -0
  41. gitflow_analytics/verify_activity.py +697 -0
  42. {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/METADATA +2 -1
  43. {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/RECORD +47 -31
  44. gitflow_analytics/cli_rich.py +0 -503
  45. {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/WHEEL +0 -0
  46. {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/entry_points.txt +0 -0
  47. {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/licenses/LICENSE +0 -0
  48. {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,333 @@
1
+ """Vulnerability scanning using multiple security tools and LLM analysis."""
2
+
3
+ import json
4
+ import logging
5
+ import re
6
+ import shutil
7
+ import subprocess
8
+ import tempfile
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class VulnerabilityScanner:
17
+ """Scan code for security vulnerabilities using tools and patterns."""
18
+
19
+ def __init__(self, config: Any):
20
+ """Initialize vulnerability scanner with configuration."""
21
+ self.config = config
22
+ self.vulnerability_patterns = {
23
+ name: re.compile(pattern) for name, pattern in config.vulnerability_patterns.items()
24
+ }
25
+
26
+ # Check which tools are available
27
+ self.available_tools = self._detect_available_tools()
28
+
29
+ def scan_files(self, files_changed: List[str], repo_path: Path) -> List[Dict]:
30
+ """Scan changed files for vulnerabilities.
31
+
32
+ Args:
33
+ files_changed: List of changed file paths
34
+ repo_path: Path to repository
35
+
36
+ Returns:
37
+ List of vulnerability findings
38
+ """
39
+ findings = []
40
+
41
+ # Quick pattern-based scanning
42
+ pattern_findings = self._scan_with_patterns(files_changed, repo_path)
43
+ findings.extend(pattern_findings)
44
+
45
+ # Tool-based scanning (run in parallel for performance)
46
+ if self.available_tools:
47
+ tool_findings = self._scan_with_tools(files_changed, repo_path)
48
+ findings.extend(tool_findings)
49
+
50
+ return findings
51
+
52
+ def _detect_available_tools(self) -> Dict[str, bool]:
53
+ """Detect which security tools are installed."""
54
+ tools = {}
55
+
56
+ # Check for Semgrep
57
+ if self.config.enable_semgrep:
58
+ tools["semgrep"] = self._is_tool_available("semgrep")
59
+ if not tools["semgrep"]:
60
+ logger.info("Semgrep not found. Install with: pip install semgrep")
61
+
62
+ # Check for Bandit (Python)
63
+ if self.config.enable_bandit:
64
+ tools["bandit"] = self._is_tool_available("bandit")
65
+ if not tools["bandit"]:
66
+ logger.info("Bandit not found. Install with: pip install bandit")
67
+
68
+ # Check for gosec (Go)
69
+ if self.config.enable_gosec:
70
+ tools["gosec"] = self._is_tool_available("gosec")
71
+ if not tools["gosec"]:
72
+ logger.info("Gosec not found. Install from: https://github.com/securego/gosec")
73
+
74
+ return tools
75
+
76
+ def _is_tool_available(self, tool_name: str) -> bool:
77
+ """Check if a tool is available in PATH."""
78
+ return shutil.which(tool_name) is not None
79
+
80
+ def _scan_with_patterns(self, files_changed: List[str], repo_path: Path) -> List[Dict]:
81
+ """Quick pattern-based vulnerability detection."""
82
+ findings = []
83
+
84
+ for file_path in files_changed:
85
+ full_path = repo_path / file_path
86
+ if not full_path.exists() or not full_path.is_file():
87
+ continue
88
+
89
+ try:
90
+ content = full_path.read_text(encoding="utf-8", errors="ignore")
91
+
92
+ for vuln_type, pattern in self.vulnerability_patterns.items():
93
+ for match in pattern.finditer(content):
94
+ line_num = content[: match.start()].count("\n") + 1
95
+ finding = {
96
+ "type": "vulnerability",
97
+ "vulnerability_type": vuln_type,
98
+ "severity": self._get_vuln_severity(vuln_type),
99
+ "file": file_path,
100
+ "line": line_num,
101
+ "message": f"Potential {vuln_type.replace('_', ' ')} detected",
102
+ "tool": "pattern_matcher",
103
+ "confidence": "medium",
104
+ }
105
+ findings.append(finding)
106
+ except Exception as e:
107
+ logger.debug(f"Error scanning {file_path}: {e}")
108
+
109
+ return findings
110
+
111
+ def _scan_with_tools(self, files_changed: List[str], repo_path: Path) -> List[Dict]:
112
+ """Run security tools on changed files."""
113
+ all_findings = []
114
+
115
+ # Group files by language for efficient tool execution
116
+ files_by_language = self._group_files_by_language(files_changed)
117
+
118
+ with ThreadPoolExecutor(max_workers=4) as executor:
119
+ futures = []
120
+
121
+ # Run Semgrep if available (works on all languages)
122
+ if self.available_tools.get("semgrep"):
123
+ future = executor.submit(self._run_semgrep, files_changed, repo_path)
124
+ futures.append(("semgrep", future))
125
+
126
+ # Run Bandit on Python files
127
+ if self.available_tools.get("bandit") and files_by_language.get("python"):
128
+ future = executor.submit(self._run_bandit, files_by_language["python"], repo_path)
129
+ futures.append(("bandit", future))
130
+
131
+ # Run gosec on Go files
132
+ if self.available_tools.get("gosec") and files_by_language.get("go"):
133
+ future = executor.submit(self._run_gosec, files_by_language["go"], repo_path)
134
+ futures.append(("gosec", future))
135
+
136
+ # Collect results
137
+ for tool_name, future in futures:
138
+ try:
139
+ findings = future.result(timeout=30)
140
+ all_findings.extend(findings)
141
+ except Exception as e:
142
+ logger.warning(f"Error running {tool_name}: {e}")
143
+
144
+ return all_findings
145
+
146
+ def _group_files_by_language(self, files: List[str]) -> Dict[str, List[str]]:
147
+ """Group files by programming language."""
148
+ groups = {}
149
+
150
+ language_extensions = {
151
+ "python": [".py"],
152
+ "go": [".go"],
153
+ "javascript": [".js", ".jsx", ".ts", ".tsx"],
154
+ "java": [".java"],
155
+ "ruby": [".rb"],
156
+ }
157
+
158
+ for file_path in files:
159
+ path = Path(file_path)
160
+ for language, extensions in language_extensions.items():
161
+ if path.suffix in extensions:
162
+ if language not in groups:
163
+ groups[language] = []
164
+ groups[language].append(file_path)
165
+ break
166
+
167
+ return groups
168
+
169
+ def _run_semgrep(self, files: List[str], repo_path: Path) -> List[Dict]:
170
+ """Run Semgrep security scanning."""
171
+ findings = []
172
+
173
+ if not files:
174
+ return findings
175
+
176
+ try:
177
+ # Create temporary file list for semgrep
178
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
179
+ for file_path in files:
180
+ f.write(f"{file_path}\n")
181
+ file_list_path = f.name
182
+
183
+ cmd = [
184
+ "semgrep",
185
+ "--config=auto", # Use automatic rules
186
+ "--json",
187
+ "--no-error",
188
+ f"--include-list={file_list_path}",
189
+ str(repo_path),
190
+ ]
191
+
192
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=repo_path)
193
+
194
+ if result.returncode == 0 and result.stdout:
195
+ data = json.loads(result.stdout)
196
+ for finding in data.get("results", []):
197
+ findings.append(
198
+ {
199
+ "type": "vulnerability",
200
+ "vulnerability_type": finding.get("check_id", "unknown"),
201
+ "severity": self._map_semgrep_severity(
202
+ finding.get("extra", {}).get("severity")
203
+ ),
204
+ "file": Path(finding["path"]).relative_to(repo_path).as_posix(),
205
+ "line": finding.get("start", {}).get("line", 0),
206
+ "message": finding.get("extra", {}).get(
207
+ "message", "Security issue detected"
208
+ ),
209
+ "tool": "semgrep",
210
+ "confidence": "high",
211
+ }
212
+ )
213
+
214
+ # Clean up temp file
215
+ Path(file_list_path).unlink()
216
+
217
+ except Exception as e:
218
+ logger.warning(f"Error running Semgrep: {e}")
219
+
220
+ return findings
221
+
222
+ def _run_bandit(self, files: List[str], repo_path: Path) -> List[Dict]:
223
+ """Run Bandit for Python security scanning."""
224
+ findings = []
225
+
226
+ if not files:
227
+ return findings
228
+
229
+ try:
230
+ # Bandit expects full paths
231
+ full_paths = [str(repo_path / f) for f in files if (repo_path / f).exists()]
232
+
233
+ if not full_paths:
234
+ return findings
235
+
236
+ cmd = ["bandit", "-f", "json", "-ll", *full_paths] # Low severity and higher
237
+
238
+ result = subprocess.run(cmd, capture_output=True, text=True)
239
+
240
+ if result.stdout:
241
+ data = json.loads(result.stdout)
242
+ for finding in data.get("results", []):
243
+ findings.append(
244
+ {
245
+ "type": "vulnerability",
246
+ "vulnerability_type": finding.get("test_id", "unknown"),
247
+ "severity": finding.get("issue_severity", "medium").lower(),
248
+ "file": Path(finding["filename"]).relative_to(repo_path).as_posix(),
249
+ "line": finding.get("line_number", 0),
250
+ "message": finding.get("issue_text", "Security issue detected"),
251
+ "tool": "bandit",
252
+ "confidence": finding.get("issue_confidence", "medium").lower(),
253
+ }
254
+ )
255
+
256
+ except Exception as e:
257
+ logger.warning(f"Error running Bandit: {e}")
258
+
259
+ return findings
260
+
261
+ def _run_gosec(self, files: List[str], repo_path: Path) -> List[Dict]:
262
+ """Run gosec for Go security scanning."""
263
+ findings = []
264
+
265
+ if not files:
266
+ return findings
267
+
268
+ try:
269
+ # gosec works on directories, so we scan the whole repo but filter results
270
+ cmd = ["gosec", "-fmt", "json", "./..."]
271
+
272
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=repo_path)
273
+
274
+ if result.stdout:
275
+ data = json.loads(result.stdout)
276
+ for finding in data.get("Issues", []):
277
+ file_path = Path(finding["file"]).relative_to(repo_path).as_posix()
278
+
279
+ # Only include findings for changed files
280
+ if file_path in files:
281
+ findings.append(
282
+ {
283
+ "type": "vulnerability",
284
+ "vulnerability_type": finding.get("rule_id", "unknown"),
285
+ "severity": self._map_gosec_severity(finding.get("severity")),
286
+ "file": file_path,
287
+ "line": int(finding.get("line", "0")),
288
+ "message": finding.get("details", "Security issue detected"),
289
+ "tool": "gosec",
290
+ "confidence": finding.get("confidence", "medium").lower(),
291
+ }
292
+ )
293
+
294
+ except Exception as e:
295
+ logger.warning(f"Error running gosec: {e}")
296
+
297
+ return findings
298
+
299
+ def _get_vuln_severity(self, vuln_type: str) -> str:
300
+ """Map vulnerability type to severity."""
301
+ critical_types = ["sql_injection", "command_injection", "path_traversal"]
302
+ high_types = ["xss", "weak_crypto"]
303
+
304
+ if vuln_type in critical_types:
305
+ return "critical"
306
+ elif vuln_type in high_types:
307
+ return "high"
308
+ else:
309
+ return "medium"
310
+
311
+ def _map_semgrep_severity(self, severity: Optional[str]) -> str:
312
+ """Map Semgrep severity to our severity scale."""
313
+ if not severity:
314
+ return "medium"
315
+ severity = severity.upper()
316
+ if severity == "ERROR":
317
+ return "critical"
318
+ elif severity == "WARNING":
319
+ return "high"
320
+ else:
321
+ return "medium"
322
+
323
+ def _map_gosec_severity(self, severity: Optional[str]) -> str:
324
+ """Map gosec severity to our severity scale."""
325
+ if not severity:
326
+ return "medium"
327
+ severity = severity.upper()
328
+ if severity == "HIGH":
329
+ return "critical"
330
+ elif severity == "MEDIUM":
331
+ return "high"
332
+ else:
333
+ return "medium"
@@ -0,0 +1,347 @@
1
+ """LLM-based security analysis for comprehensive code review."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ from datetime import datetime, timedelta
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ import httpx
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class LLMSecurityAnalyzer:
16
+ """Use LLM to analyze code changes for security issues that tools might miss."""
17
+
18
+ def __init__(self, config: Any, cache_dir: Optional[Path] = None):
19
+ """Initialize LLM security analyzer.
20
+
21
+ Args:
22
+ config: LLM security configuration
23
+ cache_dir: Directory for caching LLM responses
24
+ """
25
+ self.config = config
26
+ self.api_key = (
27
+ config.api_key or os.getenv("OPENROUTER_API_KEY") or os.getenv("ANTHROPIC_API_KEY")
28
+ )
29
+ self.model = config.model
30
+ self.cache_dir = cache_dir or Path(".gitflow-cache/llm_security")
31
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
32
+
33
+ # Cache LLM responses for 7 days to save costs
34
+ self.cache_ttl = timedelta(days=7)
35
+
36
+ def analyze_commit(self, commit_data: Dict) -> List[Dict]:
37
+ """Analyze a commit for security issues using LLM.
38
+
39
+ Args:
40
+ commit_data: Commit data with message, files_changed, etc.
41
+
42
+ Returns:
43
+ List of security findings
44
+ """
45
+ if not self.api_key:
46
+ logger.debug("LLM API key not configured, skipping LLM security analysis")
47
+ return []
48
+
49
+ findings = []
50
+
51
+ # Check cache first
52
+ cache_key = self._get_cache_key(commit_data)
53
+ cached_result = self._get_cached_result(cache_key)
54
+ if cached_result is not None:
55
+ return cached_result
56
+
57
+ try:
58
+ # Analyze commit message and metadata
59
+ commit_findings = self._analyze_commit_message(commit_data)
60
+ findings.extend(commit_findings)
61
+
62
+ # Analyze code changes if available
63
+ if "diff_content" in commit_data:
64
+ code_findings = self._analyze_code_changes(commit_data)
65
+ findings.extend(code_findings)
66
+
67
+ # Cache the results
68
+ self._cache_result(cache_key, findings)
69
+
70
+ except Exception as e:
71
+ logger.warning(f"Error in LLM security analysis: {e}")
72
+
73
+ return findings
74
+
75
+ def _analyze_commit_message(self, commit_data: Dict) -> List[Dict]:
76
+ """Analyze commit message for security implications."""
77
+ prompt = self.config.commit_review_prompt.format(
78
+ message=commit_data.get("message", ""),
79
+ files=", ".join(commit_data.get("files_changed", [])),
80
+ category=commit_data.get("category", "unknown"),
81
+ )
82
+
83
+ response = self._call_llm(prompt)
84
+ return self._parse_llm_response(response, commit_data)
85
+
86
+ def _analyze_code_changes(self, commit_data: Dict) -> List[Dict]:
87
+ """Analyze actual code changes for security issues."""
88
+ # Limit the amount of code sent to LLM for cost control
89
+ lines_added = commit_data.get("diff_content", "")
90
+ if len(lines_added.split("\n")) > self.config.max_lines_for_llm:
91
+ lines_added = "\n".join(lines_added.split("\n")[: self.config.max_lines_for_llm])
92
+ lines_added += "\n... (truncated for analysis)"
93
+
94
+ prompt = self.config.code_review_prompt.format(
95
+ files_changed=", ".join(commit_data.get("files_changed", [])), lines_added=lines_added
96
+ )
97
+
98
+ response = self._call_llm(prompt)
99
+ return self._parse_llm_response(response, commit_data, is_code_analysis=True)
100
+
101
+ def _call_llm(self, prompt: str) -> str:
102
+ """Call the LLM API with the given prompt."""
103
+ if self.model.startswith("claude"):
104
+ return self._call_anthropic(prompt)
105
+ else:
106
+ return self._call_openrouter(prompt)
107
+
108
+ def _call_anthropic(self, prompt: str) -> str:
109
+ """Call Anthropic's Claude API."""
110
+ try:
111
+ headers = {
112
+ "x-api-key": self.api_key,
113
+ "anthropic-version": "2023-06-01",
114
+ "content-type": "application/json",
115
+ }
116
+
117
+ data = {
118
+ "model": self.model,
119
+ "max_tokens": 500,
120
+ "messages": [{"role": "user", "content": prompt}],
121
+ "temperature": 0.1, # Low temperature for consistent analysis
122
+ }
123
+
124
+ with httpx.Client() as client:
125
+ response = client.post(
126
+ "https://api.anthropic.com/v1/messages", headers=headers, json=data, timeout=30
127
+ )
128
+
129
+ if response.status_code == 200:
130
+ return response.json()["content"][0]["text"]
131
+ else:
132
+ logger.warning(f"Claude API error: {response.status_code}")
133
+ return ""
134
+
135
+ except Exception as e:
136
+ logger.warning(f"Error calling Claude API: {e}")
137
+ return ""
138
+
139
+ def _call_openrouter(self, prompt: str) -> str:
140
+ """Call OpenRouter API for various LLM models."""
141
+ try:
142
+ headers = {
143
+ "Authorization": f"Bearer {self.api_key}",
144
+ "Content-Type": "application/json",
145
+ }
146
+
147
+ data = {
148
+ "model": self.model,
149
+ "messages": [
150
+ {
151
+ "role": "system",
152
+ "content": "You are a security expert analyzing code for vulnerabilities. Be concise and specific.",
153
+ },
154
+ {"role": "user", "content": prompt},
155
+ ],
156
+ "max_tokens": 500,
157
+ "temperature": 0.1,
158
+ }
159
+
160
+ with httpx.Client() as client:
161
+ response = client.post(
162
+ "https://openrouter.ai/api/v1/chat/completions",
163
+ headers=headers,
164
+ json=data,
165
+ timeout=30,
166
+ )
167
+
168
+ if response.status_code == 200:
169
+ return response.json()["choices"][0]["message"]["content"]
170
+ else:
171
+ logger.warning(f"OpenRouter API error: {response.status_code}")
172
+ return ""
173
+
174
+ except Exception as e:
175
+ logger.warning(f"Error calling OpenRouter API: {e}")
176
+ return ""
177
+
178
+ def _parse_llm_response(
179
+ self, response: str, commit_data: Dict, is_code_analysis: bool = False
180
+ ) -> List[Dict]:
181
+ """Parse LLM response and extract security findings."""
182
+ findings = []
183
+
184
+ if not response or "no security issues" in response.lower():
185
+ return findings
186
+
187
+ # Extract specific security concerns from the response
188
+ security_keywords = {
189
+ "authentication": ("high", "authentication"),
190
+ "authorization": ("high", "authorization"),
191
+ "injection": ("critical", "injection"),
192
+ "sql": ("critical", "sql_injection"),
193
+ "xss": ("high", "xss"),
194
+ "csrf": ("high", "csrf"),
195
+ "exposure": ("high", "data_exposure"),
196
+ "credential": ("critical", "credential_exposure"),
197
+ "secret": ("critical", "secret_exposure"),
198
+ "crypto": ("high", "weak_cryptography"),
199
+ "validation": ("medium", "input_validation"),
200
+ "sanitization": ("medium", "input_sanitization"),
201
+ "permission": ("high", "permission_issue"),
202
+ "privilege": ("high", "privilege_escalation"),
203
+ "buffer": ("critical", "buffer_overflow"),
204
+ "race": ("high", "race_condition"),
205
+ "session": ("high", "session_management"),
206
+ "cookie": ("medium", "cookie_security"),
207
+ "cors": ("medium", "cors_misconfiguration"),
208
+ "encryption": ("high", "encryption_issue"),
209
+ }
210
+
211
+ # Check for security keywords in the response
212
+ response_lower = response.lower()
213
+ found_issues = []
214
+
215
+ for keyword, (severity, issue_type) in security_keywords.items():
216
+ if keyword in response_lower:
217
+ found_issues.append((severity, issue_type))
218
+
219
+ # Create findings based on detected issues
220
+ if found_issues:
221
+ # Extract the most severe issue
222
+ severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
223
+ found_issues.sort(key=lambda x: severity_order.get(x[0], 999))
224
+
225
+ finding = {
226
+ "type": "security",
227
+ "source": "llm_analysis",
228
+ "vulnerability_type": found_issues[0][1],
229
+ "severity": found_issues[0][0],
230
+ "commit": commit_data.get("commit_hash_short", "unknown"),
231
+ "message": self._extract_finding_message(response),
232
+ "confidence": self._calculate_confidence(response),
233
+ "analysis_type": "code" if is_code_analysis else "commit",
234
+ "files": commit_data.get("files_changed", []),
235
+ }
236
+
237
+ findings.append(finding)
238
+
239
+ return findings
240
+
241
+ def _extract_finding_message(self, response: str) -> str:
242
+ """Extract a concise finding message from LLM response."""
243
+ # Take the first meaningful sentence
244
+ sentences = response.split(".")
245
+ for sentence in sentences:
246
+ sentence = sentence.strip()
247
+ if len(sentence) > 20 and not sentence.lower().startswith(("the", "this", "it")):
248
+ return sentence + "."
249
+
250
+ # Fallback to truncated response
251
+ return response[:200] + "..." if len(response) > 200 else response
252
+
253
+ def _calculate_confidence(self, response: str) -> str:
254
+ """Calculate confidence level based on LLM response characteristics."""
255
+ response_lower = response.lower()
256
+
257
+ # High confidence indicators
258
+ high_confidence_words = [
259
+ "definitely",
260
+ "clearly",
261
+ "certain",
262
+ "obvious",
263
+ "critical",
264
+ "severe",
265
+ ]
266
+ if any(word in response_lower for word in high_confidence_words):
267
+ return "high"
268
+
269
+ # Low confidence indicators
270
+ low_confidence_words = ["might", "could", "possibly", "perhaps", "may", "potential"]
271
+ if any(word in response_lower for word in low_confidence_words):
272
+ return "medium"
273
+
274
+ return "high" if len(response) > 100 else "medium"
275
+
276
+ def _get_cache_key(self, commit_data: Dict) -> str:
277
+ """Generate cache key for commit data."""
278
+ key_parts = [
279
+ commit_data.get("commit_hash", ""),
280
+ str(sorted(commit_data.get("files_changed", []))),
281
+ commit_data.get("message", "")[:100],
282
+ ]
283
+ key_str = "|".join(key_parts)
284
+ # Simple hash for filename
285
+ import hashlib
286
+
287
+ return hashlib.sha256(key_str.encode()).hexdigest()[:16]
288
+
289
+ def _get_cached_result(self, cache_key: str) -> Optional[List[Dict]]:
290
+ """Get cached result if it exists and is not expired."""
291
+ cache_file = self.cache_dir / f"{cache_key}.json"
292
+ if not cache_file.exists():
293
+ return None
294
+
295
+ try:
296
+ # Check if cache is expired
297
+ file_time = datetime.fromtimestamp(cache_file.stat().st_mtime)
298
+ if datetime.now() - file_time > self.cache_ttl:
299
+ cache_file.unlink() # Delete expired cache
300
+ return None
301
+
302
+ with open(cache_file) as f:
303
+ return json.load(f)
304
+ except Exception as e:
305
+ logger.debug(f"Error reading cache: {e}")
306
+ return None
307
+
308
+ def _cache_result(self, cache_key: str, result: List[Dict]) -> None:
309
+ """Cache the analysis result."""
310
+ cache_file = self.cache_dir / f"{cache_key}.json"
311
+ try:
312
+ with open(cache_file, "w") as f:
313
+ json.dump(result, f)
314
+ except Exception as e:
315
+ logger.debug(f"Error writing cache: {e}")
316
+
317
+ def generate_security_insights(self, all_findings: List[Dict]) -> str:
318
+ """Generate high-level security insights from all findings."""
319
+ if not all_findings:
320
+ return "No security issues detected in the analyzed period."
321
+
322
+ # Aggregate findings
323
+ by_severity = {}
324
+ by_type = {}
325
+
326
+ for finding in all_findings:
327
+ severity = finding.get("severity", "unknown")
328
+ vuln_type = finding.get("vulnerability_type", "unknown")
329
+
330
+ by_severity[severity] = by_severity.get(severity, 0) + 1
331
+ by_type[vuln_type] = by_type.get(vuln_type, 0) + 1
332
+
333
+ # Generate insights prompt
334
+ prompt = f"""Analyze these security findings and provide strategic recommendations:
335
+
336
+ Findings by severity: {json.dumps(by_severity, indent=2)}
337
+ Findings by type: {json.dumps(by_type, indent=2)}
338
+
339
+ Provide:
340
+ 1. Top 3 security risks to address
341
+ 2. Recommended security improvements
342
+ 3. Security training needs for the team
343
+
344
+ Be concise and actionable."""
345
+
346
+ response = self._call_llm(prompt)
347
+ return response if response else "Unable to generate security insights."
@@ -0,0 +1,5 @@
1
+ """Security reporting module."""
2
+
3
+ from .security_report import SecurityReportGenerator
4
+
5
+ __all__ = ["SecurityReportGenerator"]