codegraph-cli 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_cli/__init__.py +4 -0
- codegraph_cli/agents.py +191 -0
- codegraph_cli/bug_detector.py +386 -0
- codegraph_cli/chat_agent.py +352 -0
- codegraph_cli/chat_session.py +220 -0
- codegraph_cli/cli.py +330 -0
- codegraph_cli/cli_chat.py +367 -0
- codegraph_cli/cli_diagnose.py +133 -0
- codegraph_cli/cli_refactor.py +230 -0
- codegraph_cli/cli_setup.py +470 -0
- codegraph_cli/cli_test.py +177 -0
- codegraph_cli/cli_v2.py +267 -0
- codegraph_cli/codegen_agent.py +265 -0
- codegraph_cli/config.py +31 -0
- codegraph_cli/config_manager.py +341 -0
- codegraph_cli/context_manager.py +500 -0
- codegraph_cli/crew_agents.py +123 -0
- codegraph_cli/crew_chat.py +159 -0
- codegraph_cli/crew_tools.py +497 -0
- codegraph_cli/diff_engine.py +265 -0
- codegraph_cli/embeddings.py +241 -0
- codegraph_cli/graph_export.py +144 -0
- codegraph_cli/llm.py +642 -0
- codegraph_cli/models.py +47 -0
- codegraph_cli/models_v2.py +185 -0
- codegraph_cli/orchestrator.py +49 -0
- codegraph_cli/parser.py +800 -0
- codegraph_cli/performance_analyzer.py +223 -0
- codegraph_cli/project_context.py +230 -0
- codegraph_cli/rag.py +200 -0
- codegraph_cli/refactor_agent.py +452 -0
- codegraph_cli/security_scanner.py +366 -0
- codegraph_cli/storage.py +390 -0
- codegraph_cli/templates/graph_interactive.html +257 -0
- codegraph_cli/testgen_agent.py +316 -0
- codegraph_cli/validation_engine.py +285 -0
- codegraph_cli/vector_store.py +293 -0
- codegraph_cli-2.0.0.dist-info/METADATA +318 -0
- codegraph_cli-2.0.0.dist-info/RECORD +43 -0
- codegraph_cli-2.0.0.dist-info/WHEEL +5 -0
- codegraph_cli-2.0.0.dist-info/entry_points.txt +2 -0
- codegraph_cli-2.0.0.dist-info/licenses/LICENSE +21 -0
- codegraph_cli-2.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
"""Security vulnerability scanner."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
import re
|
|
7
|
+
from typing import Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
from .storage import GraphStore
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SecurityScanner:
|
|
13
|
+
"""Scan code for security vulnerabilities."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, store: GraphStore):
|
|
16
|
+
self.store = store
|
|
17
|
+
|
|
18
|
+
# Patterns for detecting hardcoded secrets
|
|
19
|
+
self.secret_patterns = [
|
|
20
|
+
(r'api[_-]?key\s*=\s*["\']([^"\']{10,})["\']', "API Key"),
|
|
21
|
+
(r'password\s*=\s*["\']([^"\']+)["\']', "Password"),
|
|
22
|
+
(r'secret\s*=\s*["\']([^"\']{10,})["\']', "Secret"),
|
|
23
|
+
(r'token\s*=\s*["\']([^"\']{10,})["\']', "Token"),
|
|
24
|
+
(r'aws_access_key_id\s*=\s*["\']([^"\']+)["\']', "AWS Access Key"),
|
|
25
|
+
(r'private_key\s*=\s*["\']([^"\']+)["\']', "Private Key"),
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
# Dangerous functions that could lead to injection
|
|
29
|
+
self.dangerous_functions = {
|
|
30
|
+
"eval": "code_injection",
|
|
31
|
+
"exec": "code_injection",
|
|
32
|
+
"compile": "code_injection",
|
|
33
|
+
"__import__": "code_injection",
|
|
34
|
+
"system": "command_injection",
|
|
35
|
+
"popen": "command_injection",
|
|
36
|
+
"spawn": "command_injection",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
def scan_file(self, file_path: str, generate_fixes: bool = False) -> List[Dict]:
|
|
40
|
+
"""Scan file for security issues.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
file_path: Path to file to scan
|
|
44
|
+
generate_fixes: Whether to generate auto-fix suggestions
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
List of security issue dictionaries
|
|
48
|
+
"""
|
|
49
|
+
issues = []
|
|
50
|
+
|
|
51
|
+
nodes = [n for n in self.store.get_nodes() if n["file_path"] == file_path]
|
|
52
|
+
# Skip module-level node when function/class nodes exist to avoid dupes
|
|
53
|
+
has_children = any(n["node_type"] != "module" for n in nodes)
|
|
54
|
+
if has_children:
|
|
55
|
+
nodes = [n for n in nodes if n["node_type"] != "module"]
|
|
56
|
+
|
|
57
|
+
seen: set = set() # (line, type) dedup
|
|
58
|
+
|
|
59
|
+
for node in nodes:
|
|
60
|
+
try:
|
|
61
|
+
tree = ast.parse(node["code"])
|
|
62
|
+
except SyntaxError:
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
for issue in (
|
|
66
|
+
self._detect_sql_injection(tree, node)
|
|
67
|
+
+ self._detect_command_injection(tree, node)
|
|
68
|
+
+ self._detect_hardcoded_secrets(node)
|
|
69
|
+
+ self._detect_path_traversal(tree, node)
|
|
70
|
+
+ self._detect_unsafe_deserialization(tree, node)
|
|
71
|
+
):
|
|
72
|
+
key = (issue["line"], issue["type"])
|
|
73
|
+
if key not in seen:
|
|
74
|
+
seen.add(key)
|
|
75
|
+
issues.append(issue)
|
|
76
|
+
|
|
77
|
+
# Add auto-fixes if requested
|
|
78
|
+
if generate_fixes:
|
|
79
|
+
issues = [self._add_auto_fix(issue) for issue in issues]
|
|
80
|
+
|
|
81
|
+
return issues
|
|
82
|
+
|
|
83
|
+
def _add_auto_fix(self, issue: Dict) -> Dict:
|
|
84
|
+
"""Add auto-fix suggestion to issue."""
|
|
85
|
+
issue_type = issue["type"]
|
|
86
|
+
|
|
87
|
+
if issue_type == "sql_injection":
|
|
88
|
+
issue["auto_fix"] = """# Use parameterized queries
|
|
89
|
+
cursor.execute(
|
|
90
|
+
"SELECT * FROM users WHERE name = ?",
|
|
91
|
+
(username,)
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Or for multiple parameters
|
|
95
|
+
cursor.execute(
|
|
96
|
+
"SELECT * FROM users WHERE name = ? AND age = ?",
|
|
97
|
+
(username, age)
|
|
98
|
+
)"""
|
|
99
|
+
|
|
100
|
+
elif issue_type == "command_injection":
|
|
101
|
+
issue["auto_fix"] = """# Use subprocess with shell=False
|
|
102
|
+
import subprocess
|
|
103
|
+
|
|
104
|
+
result = subprocess.run(
|
|
105
|
+
["command", "arg1", "arg2"], # Pass as list
|
|
106
|
+
shell=False, # Never use shell=True with user input
|
|
107
|
+
capture_output=True,
|
|
108
|
+
text=True
|
|
109
|
+
)"""
|
|
110
|
+
|
|
111
|
+
elif issue_type == "hardcoded_secret":
|
|
112
|
+
issue["auto_fix"] = """# Use environment variables
|
|
113
|
+
import os
|
|
114
|
+
|
|
115
|
+
API_KEY = os.getenv("API_KEY")
|
|
116
|
+
if not API_KEY:
|
|
117
|
+
raise ValueError("API_KEY environment variable not set")
|
|
118
|
+
|
|
119
|
+
# Or use python-dotenv
|
|
120
|
+
from dotenv import load_dotenv
|
|
121
|
+
load_dotenv()
|
|
122
|
+
API_KEY = os.getenv("API_KEY")"""
|
|
123
|
+
|
|
124
|
+
elif issue_type == "path_traversal":
|
|
125
|
+
issue["auto_fix"] = """# Validate and sanitize paths
|
|
126
|
+
from pathlib import Path
|
|
127
|
+
|
|
128
|
+
def safe_open_file(user_path: str, base_dir: str):
|
|
129
|
+
# Resolve to absolute path
|
|
130
|
+
full_path = Path(base_dir) / user_path
|
|
131
|
+
full_path = full_path.resolve()
|
|
132
|
+
|
|
133
|
+
# Ensure it's within base_dir
|
|
134
|
+
if not str(full_path).startswith(str(Path(base_dir).resolve())):
|
|
135
|
+
raise ValueError("Path traversal detected")
|
|
136
|
+
|
|
137
|
+
return open(full_path)"""
|
|
138
|
+
|
|
139
|
+
elif issue_type == "unsafe_deserialization":
|
|
140
|
+
issue["auto_fix"] = """# Use safe alternatives
|
|
141
|
+
|
|
142
|
+
# For YAML: use SafeLoader
|
|
143
|
+
import yaml
|
|
144
|
+
data = yaml.safe_load(file_content)
|
|
145
|
+
# or
|
|
146
|
+
data = yaml.load(file_content, Loader=yaml.SafeLoader)
|
|
147
|
+
|
|
148
|
+
# For pickle: validate source or use JSON instead
|
|
149
|
+
import json
|
|
150
|
+
data = json.loads(file_content) # Safer alternative"""
|
|
151
|
+
|
|
152
|
+
return issue
|
|
153
|
+
|
|
154
|
+
def _detect_sql_injection(self, tree: ast.AST, node: Dict) -> List[Dict]:
|
|
155
|
+
"""Detect SQL injection vulnerabilities."""
|
|
156
|
+
issues = []
|
|
157
|
+
|
|
158
|
+
for ast_node in ast.walk(tree):
|
|
159
|
+
# Look for string formatting in SQL-like strings
|
|
160
|
+
if isinstance(ast_node, ast.Call):
|
|
161
|
+
if isinstance(ast_node.func, ast.Attribute):
|
|
162
|
+
# Check for .execute() with string concatenation
|
|
163
|
+
if ast_node.func.attr in ["execute", "executemany", "raw"]:
|
|
164
|
+
if ast_node.args:
|
|
165
|
+
arg = ast_node.args[0]
|
|
166
|
+
|
|
167
|
+
# Check if it's string concatenation (BinOp with Add)
|
|
168
|
+
if isinstance(arg, ast.BinOp) and isinstance(arg.op, ast.Add):
|
|
169
|
+
issues.append({
|
|
170
|
+
"type": "sql_injection",
|
|
171
|
+
"severity": "critical",
|
|
172
|
+
"line": node["start_line"] + ast_node.lineno - 1,
|
|
173
|
+
"message": "Potential SQL injection via string concatenation",
|
|
174
|
+
"suggestion": "Use parameterized queries with placeholders (?)",
|
|
175
|
+
"code_snippet": ast.unparse(ast_node)[:100]
|
|
176
|
+
})
|
|
177
|
+
|
|
178
|
+
# Check for f-strings
|
|
179
|
+
elif isinstance(arg, ast.JoinedStr):
|
|
180
|
+
issues.append({
|
|
181
|
+
"type": "sql_injection",
|
|
182
|
+
"severity": "critical",
|
|
183
|
+
"line": node["start_line"] + ast_node.lineno - 1,
|
|
184
|
+
"message": "Potential SQL injection via f-string",
|
|
185
|
+
"suggestion": "Use parameterized queries instead of f-strings",
|
|
186
|
+
"code_snippet": ast.unparse(ast_node)[:100]
|
|
187
|
+
})
|
|
188
|
+
|
|
189
|
+
# Check for .format()
|
|
190
|
+
elif isinstance(arg, ast.Call):
|
|
191
|
+
if isinstance(arg.func, ast.Attribute) and arg.func.attr == "format":
|
|
192
|
+
issues.append({
|
|
193
|
+
"type": "sql_injection",
|
|
194
|
+
"severity": "critical",
|
|
195
|
+
"line": node["start_line"] + ast_node.lineno - 1,
|
|
196
|
+
"message": "Potential SQL injection via .format()",
|
|
197
|
+
"suggestion": "Use parameterized queries with placeholders",
|
|
198
|
+
"code_snippet": ast.unparse(ast_node)[:100]
|
|
199
|
+
})
|
|
200
|
+
|
|
201
|
+
return issues
|
|
202
|
+
|
|
203
|
+
def _detect_command_injection(self, tree: ast.AST, node: Dict) -> List[Dict]:
|
|
204
|
+
"""Detect command injection risks."""
|
|
205
|
+
issues = []
|
|
206
|
+
|
|
207
|
+
for ast_node in ast.walk(tree):
|
|
208
|
+
if isinstance(ast_node, ast.Call):
|
|
209
|
+
func_name = None
|
|
210
|
+
|
|
211
|
+
if isinstance(ast_node.func, ast.Name):
|
|
212
|
+
func_name = ast_node.func.id
|
|
213
|
+
elif isinstance(ast_node.func, ast.Attribute):
|
|
214
|
+
func_name = ast_node.func.attr
|
|
215
|
+
|
|
216
|
+
if func_name in self.dangerous_functions:
|
|
217
|
+
issue_type = self.dangerous_functions[func_name]
|
|
218
|
+
|
|
219
|
+
issues.append({
|
|
220
|
+
"type": issue_type,
|
|
221
|
+
"severity": "critical",
|
|
222
|
+
"line": node["start_line"] + ast_node.lineno - 1,
|
|
223
|
+
"message": f"Unsafe use of '{func_name}()' with potential user input",
|
|
224
|
+
"suggestion": "Use subprocess.run() with shell=False and validate inputs",
|
|
225
|
+
"code_snippet": ast.unparse(ast_node)[:100]
|
|
226
|
+
})
|
|
227
|
+
|
|
228
|
+
# Check for subprocess with shell=True
|
|
229
|
+
if func_name in ["run", "call", "Popen"]:
|
|
230
|
+
for keyword in ast_node.keywords:
|
|
231
|
+
if keyword.arg == "shell":
|
|
232
|
+
if isinstance(keyword.value, ast.Constant) and keyword.value.value is True:
|
|
233
|
+
issues.append({
|
|
234
|
+
"type": "command_injection",
|
|
235
|
+
"severity": "high",
|
|
236
|
+
"line": node["start_line"] + ast_node.lineno - 1,
|
|
237
|
+
"message": "subprocess called with shell=True",
|
|
238
|
+
"suggestion": "Use shell=False and pass command as list",
|
|
239
|
+
"code_snippet": ast.unparse(ast_node)[:100]
|
|
240
|
+
})
|
|
241
|
+
|
|
242
|
+
return issues
|
|
243
|
+
|
|
244
|
+
def _detect_hardcoded_secrets(self, node: Dict) -> List[Dict]:
|
|
245
|
+
"""Detect hardcoded secrets in code."""
|
|
246
|
+
issues = []
|
|
247
|
+
code = node["code"]
|
|
248
|
+
|
|
249
|
+
for pattern, secret_type in self.secret_patterns:
|
|
250
|
+
matches = re.finditer(pattern, code, re.IGNORECASE)
|
|
251
|
+
for match in matches:
|
|
252
|
+
# Skip if it looks like a placeholder
|
|
253
|
+
value = match.group(1)
|
|
254
|
+
placeholders = ["your_key_here", "xxx", "***", "placeholder", "example", "test", "dummy"]
|
|
255
|
+
|
|
256
|
+
if any(p in value.lower() for p in placeholders):
|
|
257
|
+
continue
|
|
258
|
+
|
|
259
|
+
# Skip very short values (likely not real secrets)
|
|
260
|
+
if len(value) < 8:
|
|
261
|
+
continue
|
|
262
|
+
|
|
263
|
+
issues.append({
|
|
264
|
+
"type": "hardcoded_secret",
|
|
265
|
+
"severity": "high",
|
|
266
|
+
"line": node["start_line"] + code[:match.start()].count('\n'),
|
|
267
|
+
"message": f"Hardcoded {secret_type} found",
|
|
268
|
+
"suggestion": "Use environment variables or secret management (e.g., os.getenv())",
|
|
269
|
+
"code_snippet": f"{secret_type.lower()}=***"
|
|
270
|
+
})
|
|
271
|
+
|
|
272
|
+
return issues
|
|
273
|
+
|
|
274
|
+
def _detect_path_traversal(self, tree: ast.AST, node: Dict) -> List[Dict]:
|
|
275
|
+
"""Detect path traversal vulnerabilities."""
|
|
276
|
+
issues = []
|
|
277
|
+
|
|
278
|
+
for ast_node in ast.walk(tree):
|
|
279
|
+
# Look for file operations
|
|
280
|
+
if isinstance(ast_node, ast.Call):
|
|
281
|
+
func_name = None
|
|
282
|
+
|
|
283
|
+
if isinstance(ast_node.func, ast.Name):
|
|
284
|
+
func_name = ast_node.func.id
|
|
285
|
+
elif isinstance(ast_node.func, ast.Attribute):
|
|
286
|
+
func_name = ast_node.func.attr
|
|
287
|
+
|
|
288
|
+
# File operations that could be vulnerable
|
|
289
|
+
if func_name in ["open", "read", "write", "remove", "unlink", "rmdir"]:
|
|
290
|
+
# Check if path comes from string concatenation (potential user input)
|
|
291
|
+
if ast_node.args:
|
|
292
|
+
path_arg = ast_node.args[0]
|
|
293
|
+
|
|
294
|
+
if isinstance(path_arg, (ast.BinOp, ast.JoinedStr)):
|
|
295
|
+
issues.append({
|
|
296
|
+
"type": "path_traversal",
|
|
297
|
+
"severity": "medium",
|
|
298
|
+
"line": node["start_line"] + ast_node.lineno - 1,
|
|
299
|
+
"message": "Potential path traversal if path comes from user input",
|
|
300
|
+
"suggestion": "Validate and sanitize file paths, use Path.resolve() and check against allowed directories",
|
|
301
|
+
"code_snippet": ast.unparse(ast_node)[:100]
|
|
302
|
+
})
|
|
303
|
+
|
|
304
|
+
return issues
|
|
305
|
+
|
|
306
|
+
def _detect_unsafe_deserialization(self, tree: ast.AST, node: Dict) -> List[Dict]:
|
|
307
|
+
"""Detect unsafe deserialization (pickle, yaml)."""
|
|
308
|
+
issues = []
|
|
309
|
+
|
|
310
|
+
for ast_node in ast.walk(tree):
|
|
311
|
+
if isinstance(ast_node, ast.Call):
|
|
312
|
+
# Check for pickle.loads, pickle.load
|
|
313
|
+
if isinstance(ast_node.func, ast.Attribute):
|
|
314
|
+
if isinstance(ast_node.func.value, ast.Name):
|
|
315
|
+
if ast_node.func.value.id == "pickle" and ast_node.func.attr in ["loads", "load"]:
|
|
316
|
+
issues.append({
|
|
317
|
+
"type": "unsafe_deserialization",
|
|
318
|
+
"severity": "high",
|
|
319
|
+
"line": node["start_line"] + ast_node.lineno - 1,
|
|
320
|
+
"message": "Unsafe deserialization with pickle on untrusted data",
|
|
321
|
+
"suggestion": "Use JSON or validate data source before unpickling",
|
|
322
|
+
"code_snippet": ast.unparse(ast_node)[:100]
|
|
323
|
+
})
|
|
324
|
+
|
|
325
|
+
# Check for yaml.load without safe loader
|
|
326
|
+
if isinstance(ast_node.func, ast.Attribute):
|
|
327
|
+
if isinstance(ast_node.func.value, ast.Name):
|
|
328
|
+
if ast_node.func.value.id == "yaml" and ast_node.func.attr == "load":
|
|
329
|
+
# Check if Loader is specified
|
|
330
|
+
has_safe_loader = False
|
|
331
|
+
for keyword in ast_node.keywords:
|
|
332
|
+
if keyword.arg == "Loader":
|
|
333
|
+
if isinstance(keyword.value, ast.Attribute):
|
|
334
|
+
if keyword.value.attr in ["SafeLoader", "BaseLoader"]:
|
|
335
|
+
has_safe_loader = True
|
|
336
|
+
|
|
337
|
+
if not has_safe_loader:
|
|
338
|
+
issues.append({
|
|
339
|
+
"type": "unsafe_deserialization",
|
|
340
|
+
"severity": "high",
|
|
341
|
+
"line": node["start_line"] + ast_node.lineno - 1,
|
|
342
|
+
"message": "yaml.load() without SafeLoader",
|
|
343
|
+
"suggestion": "Use yaml.safe_load() or yaml.load(data, Loader=yaml.SafeLoader)",
|
|
344
|
+
"code_snippet": ast.unparse(ast_node)[:100]
|
|
345
|
+
})
|
|
346
|
+
|
|
347
|
+
return issues
|
|
348
|
+
|
|
349
|
+
def scan_project(self) -> Dict[str, List[Dict]]:
|
|
350
|
+
"""Scan entire project for security issues.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
Dictionary mapping file paths to lists of security issues
|
|
354
|
+
"""
|
|
355
|
+
results = {}
|
|
356
|
+
|
|
357
|
+
# Get all unique file paths
|
|
358
|
+
all_nodes = self.store.get_nodes()
|
|
359
|
+
file_paths = set(node["file_path"] for node in all_nodes)
|
|
360
|
+
|
|
361
|
+
for file_path in file_paths:
|
|
362
|
+
issues = self.scan_file(file_path)
|
|
363
|
+
if issues:
|
|
364
|
+
results[file_path] = issues
|
|
365
|
+
|
|
366
|
+
return results
|