gitflow-analytics 1.3.11__py3-none-any.whl → 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/batch_classifier.py +156 -4
- gitflow_analytics/cli.py +803 -135
- gitflow_analytics/config/loader.py +39 -1
- gitflow_analytics/config/schema.py +1 -0
- gitflow_analytics/core/cache.py +20 -0
- gitflow_analytics/core/data_fetcher.py +1051 -117
- gitflow_analytics/core/git_auth.py +169 -0
- gitflow_analytics/core/git_timeout_wrapper.py +347 -0
- gitflow_analytics/core/metrics_storage.py +12 -3
- gitflow_analytics/core/progress.py +219 -18
- gitflow_analytics/core/subprocess_git.py +145 -0
- gitflow_analytics/extractors/ml_tickets.py +3 -2
- gitflow_analytics/extractors/tickets.py +93 -8
- gitflow_analytics/integrations/jira_integration.py +1 -1
- gitflow_analytics/integrations/orchestrator.py +47 -29
- gitflow_analytics/metrics/branch_health.py +3 -2
- gitflow_analytics/models/database.py +72 -1
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +12 -5
- gitflow_analytics/pm_framework/orchestrator.py +8 -3
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +24 -4
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +3 -1
- gitflow_analytics/qualitative/core/llm_fallback.py +34 -2
- gitflow_analytics/reports/narrative_writer.py +118 -74
- gitflow_analytics/security/__init__.py +11 -0
- gitflow_analytics/security/config.py +189 -0
- gitflow_analytics/security/extractors/__init__.py +7 -0
- gitflow_analytics/security/extractors/dependency_checker.py +379 -0
- gitflow_analytics/security/extractors/secret_detector.py +197 -0
- gitflow_analytics/security/extractors/vulnerability_scanner.py +333 -0
- gitflow_analytics/security/llm_analyzer.py +347 -0
- gitflow_analytics/security/reports/__init__.py +5 -0
- gitflow_analytics/security/reports/security_report.py +358 -0
- gitflow_analytics/security/security_analyzer.py +414 -0
- gitflow_analytics/tui/app.py +3 -1
- gitflow_analytics/tui/progress_adapter.py +313 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +407 -46
- gitflow_analytics/tui/screens/results_screen.py +219 -206
- gitflow_analytics/ui/__init__.py +21 -0
- gitflow_analytics/ui/progress_display.py +1477 -0
- gitflow_analytics/verify_activity.py +697 -0
- {gitflow_analytics-1.3.11.dist-info → gitflow_analytics-3.3.0.dist-info}/METADATA +2 -1
- {gitflow_analytics-1.3.11.dist-info → gitflow_analytics-3.3.0.dist-info}/RECORD +47 -31
- gitflow_analytics/cli_rich.py +0 -503
- {gitflow_analytics-1.3.11.dist-info → gitflow_analytics-3.3.0.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.3.11.dist-info → gitflow_analytics-3.3.0.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.3.11.dist-info → gitflow_analytics-3.3.0.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.3.11.dist-info → gitflow_analytics-3.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
"""Check for vulnerable dependencies in project files."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, List
|
|
8
|
+
|
|
9
|
+
import toml
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DependencyChecker:
|
|
15
|
+
"""Check for known vulnerabilities in project dependencies."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, config: Any):
|
|
18
|
+
"""Initialize dependency checker.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
config: Dependency scanning configuration
|
|
22
|
+
"""
|
|
23
|
+
self.config = config
|
|
24
|
+
self.vulnerability_cache = {}
|
|
25
|
+
|
|
26
|
+
def check_files(self, files_changed: List[str], repo_path: Path) -> List[Dict]:
|
|
27
|
+
"""Check dependency files for vulnerable packages.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
files_changed: List of changed files
|
|
31
|
+
repo_path: Repository path
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
List of vulnerability findings
|
|
35
|
+
"""
|
|
36
|
+
findings = []
|
|
37
|
+
|
|
38
|
+
for file_path in files_changed:
|
|
39
|
+
if self._is_dependency_file(file_path):
|
|
40
|
+
full_path = repo_path / file_path
|
|
41
|
+
if full_path.exists():
|
|
42
|
+
file_findings = self._check_dependency_file(full_path, file_path)
|
|
43
|
+
findings.extend(file_findings)
|
|
44
|
+
|
|
45
|
+
return findings
|
|
46
|
+
|
|
47
|
+
def _is_dependency_file(self, file_path: str) -> bool:
|
|
48
|
+
"""Check if file is a dependency specification file."""
|
|
49
|
+
dependency_files = [
|
|
50
|
+
"package.json",
|
|
51
|
+
"package-lock.json",
|
|
52
|
+
"yarn.lock",
|
|
53
|
+
"requirements.txt",
|
|
54
|
+
"Pipfile",
|
|
55
|
+
"Pipfile.lock",
|
|
56
|
+
"poetry.lock",
|
|
57
|
+
"pyproject.toml",
|
|
58
|
+
"go.mod",
|
|
59
|
+
"go.sum",
|
|
60
|
+
"Gemfile",
|
|
61
|
+
"Gemfile.lock",
|
|
62
|
+
"pom.xml",
|
|
63
|
+
"build.gradle",
|
|
64
|
+
"composer.json",
|
|
65
|
+
"composer.lock",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
file_name = Path(file_path).name
|
|
69
|
+
return file_name in dependency_files
|
|
70
|
+
|
|
71
|
+
def _check_dependency_file(self, file_path: Path, relative_path: str) -> List[Dict]:
|
|
72
|
+
"""Check a specific dependency file for vulnerabilities."""
|
|
73
|
+
findings = []
|
|
74
|
+
file_name = file_path.name
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
if file_name == "package.json" and self.config.check_npm:
|
|
78
|
+
dependencies = self._parse_package_json(file_path)
|
|
79
|
+
findings.extend(self._check_npm_dependencies(dependencies, relative_path))
|
|
80
|
+
|
|
81
|
+
elif file_name == "requirements.txt" and self.config.check_pip:
|
|
82
|
+
dependencies = self._parse_requirements_txt(file_path)
|
|
83
|
+
findings.extend(self._check_python_dependencies(dependencies, relative_path))
|
|
84
|
+
|
|
85
|
+
elif file_name == "pyproject.toml" and self.config.check_pip:
|
|
86
|
+
dependencies = self._parse_pyproject_toml(file_path)
|
|
87
|
+
findings.extend(self._check_python_dependencies(dependencies, relative_path))
|
|
88
|
+
|
|
89
|
+
elif file_name == "go.mod" and self.config.check_go:
|
|
90
|
+
dependencies = self._parse_go_mod(file_path)
|
|
91
|
+
findings.extend(self._check_go_dependencies(dependencies, relative_path))
|
|
92
|
+
|
|
93
|
+
elif file_name == "Gemfile" and self.config.check_ruby:
|
|
94
|
+
dependencies = self._parse_gemfile(file_path)
|
|
95
|
+
findings.extend(self._check_ruby_dependencies(dependencies, relative_path))
|
|
96
|
+
|
|
97
|
+
except Exception as e:
|
|
98
|
+
logger.warning(f"Error checking dependency file {relative_path}: {e}")
|
|
99
|
+
|
|
100
|
+
return findings
|
|
101
|
+
|
|
102
|
+
def _parse_package_json(self, file_path: Path) -> Dict[str, str]:
|
|
103
|
+
"""Parse package.json for dependencies."""
|
|
104
|
+
dependencies = {}
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
with open(file_path) as f:
|
|
108
|
+
data = json.load(f)
|
|
109
|
+
|
|
110
|
+
for dep_type in ["dependencies", "devDependencies", "peerDependencies"]:
|
|
111
|
+
if dep_type in data:
|
|
112
|
+
for name, version_spec in data[dep_type].items():
|
|
113
|
+
# Clean version spec (remove ^, ~, etc.)
|
|
114
|
+
clean_version = re.sub(r"^[^\d]*", "", version_spec)
|
|
115
|
+
clean_version = clean_version.split(" ")[0] # Handle version ranges
|
|
116
|
+
dependencies[name] = clean_version
|
|
117
|
+
|
|
118
|
+
except Exception as e:
|
|
119
|
+
logger.debug(f"Error parsing package.json: {e}")
|
|
120
|
+
|
|
121
|
+
return dependencies
|
|
122
|
+
|
|
123
|
+
def _parse_requirements_txt(self, file_path: Path) -> Dict[str, str]:
|
|
124
|
+
"""Parse requirements.txt for Python packages."""
|
|
125
|
+
dependencies = {}
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
with open(file_path) as f:
|
|
129
|
+
for line in f:
|
|
130
|
+
line = line.strip()
|
|
131
|
+
if line and not line.startswith("#"):
|
|
132
|
+
# Parse package==version or package>=version
|
|
133
|
+
match = re.match(r"^([a-zA-Z0-9\-_]+)([=<>!]+)(.+)$", line)
|
|
134
|
+
if match:
|
|
135
|
+
name = match.group(1)
|
|
136
|
+
version_spec = match.group(3)
|
|
137
|
+
# Clean version
|
|
138
|
+
clean_version = version_spec.split(",")[0].strip()
|
|
139
|
+
dependencies[name.lower()] = clean_version
|
|
140
|
+
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.debug(f"Error parsing requirements.txt: {e}")
|
|
143
|
+
|
|
144
|
+
return dependencies
|
|
145
|
+
|
|
146
|
+
def _parse_pyproject_toml(self, file_path: Path) -> Dict[str, str]:
|
|
147
|
+
"""Parse pyproject.toml for Python dependencies."""
|
|
148
|
+
dependencies = {}
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
with open(file_path) as f:
|
|
152
|
+
data = toml.load(f)
|
|
153
|
+
|
|
154
|
+
# Check different dependency sections
|
|
155
|
+
sections = [
|
|
156
|
+
["project", "dependencies"],
|
|
157
|
+
["tool", "poetry", "dependencies"],
|
|
158
|
+
["tool", "poetry", "dev-dependencies"],
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
for section_path in sections:
|
|
162
|
+
section = data
|
|
163
|
+
for key in section_path:
|
|
164
|
+
if key in section:
|
|
165
|
+
section = section[key]
|
|
166
|
+
else:
|
|
167
|
+
break
|
|
168
|
+
else:
|
|
169
|
+
# Successfully navigated to the section
|
|
170
|
+
if isinstance(section, dict):
|
|
171
|
+
for name, spec in section.items():
|
|
172
|
+
if isinstance(spec, str):
|
|
173
|
+
# Simple version string
|
|
174
|
+
clean_version = re.sub(r"^[^\d]*", "", spec)
|
|
175
|
+
dependencies[name.lower()] = clean_version
|
|
176
|
+
elif isinstance(spec, dict) and "version" in spec:
|
|
177
|
+
# Poetry-style with version key
|
|
178
|
+
clean_version = re.sub(r"^[^\d]*", "", spec["version"])
|
|
179
|
+
dependencies[name.lower()] = clean_version
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.debug(f"Error parsing pyproject.toml: {e}")
|
|
183
|
+
|
|
184
|
+
return dependencies
|
|
185
|
+
|
|
186
|
+
def _parse_go_mod(self, file_path: Path) -> Dict[str, str]:
|
|
187
|
+
"""Parse go.mod for Go dependencies."""
|
|
188
|
+
dependencies = {}
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
with open(file_path) as f:
|
|
192
|
+
in_require_block = False
|
|
193
|
+
for line in f:
|
|
194
|
+
line = line.strip()
|
|
195
|
+
|
|
196
|
+
if line.startswith("require ("):
|
|
197
|
+
in_require_block = True
|
|
198
|
+
continue
|
|
199
|
+
elif line == ")":
|
|
200
|
+
in_require_block = False
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
if in_require_block or line.startswith("require "):
|
|
204
|
+
# Parse: module/name v1.2.3
|
|
205
|
+
parts = line.replace("require ", "").split()
|
|
206
|
+
if len(parts) >= 2 and parts[1].startswith("v"):
|
|
207
|
+
dependencies[parts[0]] = parts[1]
|
|
208
|
+
|
|
209
|
+
except Exception as e:
|
|
210
|
+
logger.debug(f"Error parsing go.mod: {e}")
|
|
211
|
+
|
|
212
|
+
return dependencies
|
|
213
|
+
|
|
214
|
+
def _parse_gemfile(self, file_path: Path) -> Dict[str, str]:
|
|
215
|
+
"""Parse Gemfile for Ruby dependencies."""
|
|
216
|
+
dependencies = {}
|
|
217
|
+
|
|
218
|
+
try:
|
|
219
|
+
with open(file_path) as f:
|
|
220
|
+
for line in f:
|
|
221
|
+
line = line.strip()
|
|
222
|
+
# Parse: gem 'name', '~> version'
|
|
223
|
+
match = re.match(r"gem\s+['\"]([^'\"]+)['\"](?:,\s*['\"]([^'\"]+)['\"])?", line)
|
|
224
|
+
if match:
|
|
225
|
+
name = match.group(1)
|
|
226
|
+
version_spec = match.group(2) if match.group(2) else "unknown"
|
|
227
|
+
clean_version = re.sub(r"^[^\d]*", "", version_spec)
|
|
228
|
+
dependencies[name] = clean_version
|
|
229
|
+
|
|
230
|
+
except Exception as e:
|
|
231
|
+
logger.debug(f"Error parsing Gemfile: {e}")
|
|
232
|
+
|
|
233
|
+
return dependencies
|
|
234
|
+
|
|
235
|
+
def _check_npm_dependencies(self, dependencies: Dict[str, str], file_path: str) -> List[Dict]:
|
|
236
|
+
"""Check NPM packages for vulnerabilities using GitHub Advisory Database."""
|
|
237
|
+
findings = []
|
|
238
|
+
|
|
239
|
+
for package_name, package_version in dependencies.items():
|
|
240
|
+
vulnerabilities = self._query_vulnerability_db("npm", package_name, package_version)
|
|
241
|
+
for vuln in vulnerabilities:
|
|
242
|
+
findings.append(
|
|
243
|
+
{
|
|
244
|
+
"type": "dependency",
|
|
245
|
+
"vulnerability_type": "vulnerable_dependency",
|
|
246
|
+
"severity": vuln["severity"],
|
|
247
|
+
"package": package_name,
|
|
248
|
+
"version": package_version,
|
|
249
|
+
"file": file_path,
|
|
250
|
+
"cve": vuln.get("cve", ""),
|
|
251
|
+
"message": vuln.get(
|
|
252
|
+
"summary", f"Vulnerable {package_name}@{package_version}"
|
|
253
|
+
),
|
|
254
|
+
"tool": "dependency_checker",
|
|
255
|
+
"confidence": "high",
|
|
256
|
+
}
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
return findings
|
|
260
|
+
|
|
261
|
+
def _check_python_dependencies(
|
|
262
|
+
self, dependencies: Dict[str, str], file_path: str
|
|
263
|
+
) -> List[Dict]:
|
|
264
|
+
"""Check Python packages for vulnerabilities."""
|
|
265
|
+
findings = []
|
|
266
|
+
|
|
267
|
+
for package_name, package_version in dependencies.items():
|
|
268
|
+
vulnerabilities = self._query_vulnerability_db("pip", package_name, package_version)
|
|
269
|
+
for vuln in vulnerabilities:
|
|
270
|
+
findings.append(
|
|
271
|
+
{
|
|
272
|
+
"type": "dependency",
|
|
273
|
+
"vulnerability_type": "vulnerable_dependency",
|
|
274
|
+
"severity": vuln["severity"],
|
|
275
|
+
"package": package_name,
|
|
276
|
+
"version": package_version,
|
|
277
|
+
"file": file_path,
|
|
278
|
+
"cve": vuln.get("cve", ""),
|
|
279
|
+
"message": vuln.get(
|
|
280
|
+
"summary", f"Vulnerable {package_name}=={package_version}"
|
|
281
|
+
),
|
|
282
|
+
"tool": "dependency_checker",
|
|
283
|
+
"confidence": "high",
|
|
284
|
+
}
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
return findings
|
|
288
|
+
|
|
289
|
+
def _check_go_dependencies(self, dependencies: Dict[str, str], file_path: str) -> List[Dict]:
|
|
290
|
+
"""Check Go modules for vulnerabilities."""
|
|
291
|
+
findings = []
|
|
292
|
+
|
|
293
|
+
for module_name, module_version in dependencies.items():
|
|
294
|
+
vulnerabilities = self._query_vulnerability_db("go", module_name, module_version)
|
|
295
|
+
for vuln in vulnerabilities:
|
|
296
|
+
findings.append(
|
|
297
|
+
{
|
|
298
|
+
"type": "dependency",
|
|
299
|
+
"vulnerability_type": "vulnerable_dependency",
|
|
300
|
+
"severity": vuln["severity"],
|
|
301
|
+
"package": module_name,
|
|
302
|
+
"version": module_version,
|
|
303
|
+
"file": file_path,
|
|
304
|
+
"cve": vuln.get("cve", ""),
|
|
305
|
+
"message": vuln.get(
|
|
306
|
+
"summary", f"Vulnerable {module_name}@{module_version}"
|
|
307
|
+
),
|
|
308
|
+
"tool": "dependency_checker",
|
|
309
|
+
"confidence": "high",
|
|
310
|
+
}
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
return findings
|
|
314
|
+
|
|
315
|
+
def _check_ruby_dependencies(self, dependencies: Dict[str, str], file_path: str) -> List[Dict]:
|
|
316
|
+
"""Check Ruby gems for vulnerabilities."""
|
|
317
|
+
findings = []
|
|
318
|
+
|
|
319
|
+
for gem_name, gem_version in dependencies.items():
|
|
320
|
+
vulnerabilities = self._query_vulnerability_db("rubygems", gem_name, gem_version)
|
|
321
|
+
for vuln in vulnerabilities:
|
|
322
|
+
findings.append(
|
|
323
|
+
{
|
|
324
|
+
"type": "dependency",
|
|
325
|
+
"vulnerability_type": "vulnerable_dependency",
|
|
326
|
+
"severity": vuln["severity"],
|
|
327
|
+
"package": gem_name,
|
|
328
|
+
"version": gem_version,
|
|
329
|
+
"file": file_path,
|
|
330
|
+
"cve": vuln.get("cve", ""),
|
|
331
|
+
"message": vuln.get("summary", f"Vulnerable {gem_name} {gem_version}"),
|
|
332
|
+
"tool": "dependency_checker",
|
|
333
|
+
"confidence": "high",
|
|
334
|
+
}
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
return findings
|
|
338
|
+
|
|
339
|
+
def _query_vulnerability_db(
|
|
340
|
+
self, ecosystem: str, package: str, package_version: str
|
|
341
|
+
) -> List[Dict]:
|
|
342
|
+
"""Query vulnerability database for package vulnerabilities.
|
|
343
|
+
|
|
344
|
+
This is a simplified implementation. In production, you would:
|
|
345
|
+
1. Use the GitHub Advisory Database API
|
|
346
|
+
2. Cache results to avoid rate limiting
|
|
347
|
+
3. Handle version ranges properly
|
|
348
|
+
"""
|
|
349
|
+
# Check cache first
|
|
350
|
+
cache_key = f"{ecosystem}:{package}:{package_version}"
|
|
351
|
+
if cache_key in self.vulnerability_cache:
|
|
352
|
+
return self.vulnerability_cache[cache_key]
|
|
353
|
+
|
|
354
|
+
vulnerabilities = []
|
|
355
|
+
|
|
356
|
+
# In a real implementation, you would query:
|
|
357
|
+
# https://api.github.com/advisories
|
|
358
|
+
# or use tools like:
|
|
359
|
+
# - safety (Python)
|
|
360
|
+
# - npm audit (Node.js)
|
|
361
|
+
# - bundler-audit (Ruby)
|
|
362
|
+
# - nancy (Go)
|
|
363
|
+
|
|
364
|
+
# For now, return empty list (no vulnerabilities found)
|
|
365
|
+
# This is where you'd integrate with actual vulnerability databases
|
|
366
|
+
|
|
367
|
+
# Example of what would be returned:
|
|
368
|
+
# vulnerabilities = [{
|
|
369
|
+
# "severity": "high",
|
|
370
|
+
# "cve": "CVE-2021-12345",
|
|
371
|
+
# "summary": "Remote code execution vulnerability",
|
|
372
|
+
# "affected_versions": "< 1.2.3",
|
|
373
|
+
# "patched_versions": ">= 1.2.3"
|
|
374
|
+
# }]
|
|
375
|
+
|
|
376
|
+
# Cache the result
|
|
377
|
+
self.vulnerability_cache[cache_key] = vulnerabilities
|
|
378
|
+
|
|
379
|
+
return vulnerabilities
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Secret detection in git commits."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import math
|
|
5
|
+
import re
|
|
6
|
+
from typing import Dict, List, Optional, Tuple
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SecretDetector:
|
|
12
|
+
"""Detect potential secrets and credentials in code changes."""
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
patterns: Dict[str, str],
|
|
17
|
+
entropy_threshold: float = 4.5,
|
|
18
|
+
exclude_paths: List[str] = None,
|
|
19
|
+
):
|
|
20
|
+
"""Initialize secret detector.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
patterns: Dictionary of secret type to regex pattern
|
|
24
|
+
entropy_threshold: Shannon entropy threshold for detecting high-entropy strings
|
|
25
|
+
exclude_paths: List of glob patterns for paths to exclude
|
|
26
|
+
"""
|
|
27
|
+
self.patterns = {name: re.compile(pattern) for name, pattern in patterns.items()}
|
|
28
|
+
self.entropy_threshold = entropy_threshold
|
|
29
|
+
self.exclude_paths = exclude_paths or []
|
|
30
|
+
|
|
31
|
+
# Common false positive patterns to exclude
|
|
32
|
+
self.false_positive_patterns = [
|
|
33
|
+
re.compile(r"example\.com"),
|
|
34
|
+
re.compile(r"localhost"),
|
|
35
|
+
re.compile(r"127\.0\.0\.1"),
|
|
36
|
+
re.compile(r"test|demo|sample|example", re.IGNORECASE),
|
|
37
|
+
re.compile(r"xxx+|placeholder|your[_-]?api[_-]?key", re.IGNORECASE),
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
def scan_text(self, text: str, file_path: Optional[str] = None) -> List[Dict]:
|
|
41
|
+
"""Scan text for potential secrets.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
text: Text content to scan
|
|
45
|
+
file_path: Optional file path for context
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
List of detected secrets with metadata
|
|
49
|
+
"""
|
|
50
|
+
findings = []
|
|
51
|
+
|
|
52
|
+
# Skip if file should be excluded
|
|
53
|
+
if file_path and self._should_exclude(file_path):
|
|
54
|
+
return findings
|
|
55
|
+
|
|
56
|
+
# Check against regex patterns
|
|
57
|
+
for secret_type, pattern in self.patterns.items():
|
|
58
|
+
for match in pattern.finditer(text):
|
|
59
|
+
secret_value = match.group(0)
|
|
60
|
+
|
|
61
|
+
# Skip false positives
|
|
62
|
+
if self._is_false_positive(secret_value):
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
finding = {
|
|
66
|
+
"type": "secret",
|
|
67
|
+
"secret_type": secret_type,
|
|
68
|
+
"severity": self._get_severity(secret_type),
|
|
69
|
+
"file": file_path,
|
|
70
|
+
"line": text[: match.start()].count("\n") + 1,
|
|
71
|
+
"column": match.start() - text.rfind("\n", 0, match.start()),
|
|
72
|
+
"match": secret_value[:20] + "..." if len(secret_value) > 20 else secret_value,
|
|
73
|
+
"confidence": "high",
|
|
74
|
+
}
|
|
75
|
+
findings.append(finding)
|
|
76
|
+
|
|
77
|
+
# Check for high-entropy strings (potential secrets)
|
|
78
|
+
for line_num, line in enumerate(text.split("\n"), 1):
|
|
79
|
+
high_entropy_strings = self._find_high_entropy_strings(line)
|
|
80
|
+
for string, entropy in high_entropy_strings:
|
|
81
|
+
if not self._is_false_positive(string):
|
|
82
|
+
finding = {
|
|
83
|
+
"type": "secret",
|
|
84
|
+
"secret_type": "high_entropy_string",
|
|
85
|
+
"severity": "medium",
|
|
86
|
+
"file": file_path,
|
|
87
|
+
"line": line_num,
|
|
88
|
+
"entropy": round(entropy, 2),
|
|
89
|
+
"match": string[:20] + "..." if len(string) > 20 else string,
|
|
90
|
+
"confidence": "medium",
|
|
91
|
+
}
|
|
92
|
+
findings.append(finding)
|
|
93
|
+
|
|
94
|
+
return findings
|
|
95
|
+
|
|
96
|
+
def scan_commit(self, commit_data: Dict) -> List[Dict]:
|
|
97
|
+
"""Scan a commit for secrets.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
commit_data: Commit data dictionary with message, files_changed, etc.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of security findings
|
|
104
|
+
"""
|
|
105
|
+
findings = []
|
|
106
|
+
|
|
107
|
+
# Scan commit message
|
|
108
|
+
message_findings = self.scan_text(commit_data.get("message", ""), "commit_message")
|
|
109
|
+
findings.extend(message_findings)
|
|
110
|
+
|
|
111
|
+
# For actual file content scanning, we'd need to read the files
|
|
112
|
+
# This is a placeholder for integration with the git diff analysis
|
|
113
|
+
# In practice, you'd get the actual diff content here
|
|
114
|
+
|
|
115
|
+
return findings
|
|
116
|
+
|
|
117
|
+
def _should_exclude(self, file_path: str) -> bool:
|
|
118
|
+
"""Check if file should be excluded from scanning."""
|
|
119
|
+
from fnmatch import fnmatch
|
|
120
|
+
|
|
121
|
+
for pattern in self.exclude_paths:
|
|
122
|
+
if fnmatch(file_path, pattern):
|
|
123
|
+
return True
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
def _is_false_positive(self, value: str) -> bool:
|
|
127
|
+
"""Check if a detected secret is likely a false positive."""
|
|
128
|
+
for pattern in self.false_positive_patterns:
|
|
129
|
+
if pattern.search(value):
|
|
130
|
+
return True
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
def _get_severity(self, secret_type: str) -> str:
|
|
134
|
+
"""Determine severity based on secret type."""
|
|
135
|
+
critical_types = ["private_key", "aws_secret_key", "db_url"]
|
|
136
|
+
high_types = ["aws_access_key", "github_token", "api_key", "stripe_key"]
|
|
137
|
+
|
|
138
|
+
if secret_type in critical_types:
|
|
139
|
+
return "critical"
|
|
140
|
+
elif secret_type in high_types:
|
|
141
|
+
return "high"
|
|
142
|
+
else:
|
|
143
|
+
return "medium"
|
|
144
|
+
|
|
145
|
+
def _calculate_entropy(self, string: str) -> float:
|
|
146
|
+
"""Calculate Shannon entropy of a string."""
|
|
147
|
+
if not string:
|
|
148
|
+
return 0.0
|
|
149
|
+
|
|
150
|
+
# Count character frequencies
|
|
151
|
+
char_counts = {}
|
|
152
|
+
for char in string:
|
|
153
|
+
char_counts[char] = char_counts.get(char, 0) + 1
|
|
154
|
+
|
|
155
|
+
# Calculate entropy
|
|
156
|
+
entropy = 0.0
|
|
157
|
+
length = len(string)
|
|
158
|
+
for count in char_counts.values():
|
|
159
|
+
probability = count / length
|
|
160
|
+
if probability > 0:
|
|
161
|
+
entropy -= probability * math.log2(probability)
|
|
162
|
+
|
|
163
|
+
return entropy
|
|
164
|
+
|
|
165
|
+
def _find_high_entropy_strings(
|
|
166
|
+
self, text: str, min_length: int = 20
|
|
167
|
+
) -> List[Tuple[str, float]]:
|
|
168
|
+
"""Find strings with high entropy (potential secrets).
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
text: Text to analyze
|
|
172
|
+
min_length: Minimum string length to consider
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
List of (string, entropy) tuples
|
|
176
|
+
"""
|
|
177
|
+
high_entropy_strings = []
|
|
178
|
+
|
|
179
|
+
# Look for quoted strings and continuous non-space sequences
|
|
180
|
+
patterns = [
|
|
181
|
+
r'"([^"]+)"', # Double quoted strings
|
|
182
|
+
r"'([^']+)'", # Single quoted strings
|
|
183
|
+
r"`([^`]+)`", # Backtick strings
|
|
184
|
+
r"=\s*([^\s;,]+)", # Values after equals sign
|
|
185
|
+
r':\s*"([^"]+)"', # JSON-style values
|
|
186
|
+
r":\s*\'([^\']+)\'", # JSON-style values with single quotes
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
for pattern in patterns:
|
|
190
|
+
for match in re.finditer(pattern, text):
|
|
191
|
+
string = match.group(1)
|
|
192
|
+
if len(string) >= min_length:
|
|
193
|
+
entropy = self._calculate_entropy(string)
|
|
194
|
+
if entropy >= self.entropy_threshold:
|
|
195
|
+
high_entropy_strings.append((string, entropy))
|
|
196
|
+
|
|
197
|
+
return high_entropy_strings
|