codegraph-cli 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. codegraph_cli/__init__.py +4 -0
  2. codegraph_cli/agents.py +191 -0
  3. codegraph_cli/bug_detector.py +386 -0
  4. codegraph_cli/chat_agent.py +352 -0
  5. codegraph_cli/chat_session.py +220 -0
  6. codegraph_cli/cli.py +330 -0
  7. codegraph_cli/cli_chat.py +367 -0
  8. codegraph_cli/cli_diagnose.py +133 -0
  9. codegraph_cli/cli_refactor.py +230 -0
  10. codegraph_cli/cli_setup.py +470 -0
  11. codegraph_cli/cli_test.py +177 -0
  12. codegraph_cli/cli_v2.py +267 -0
  13. codegraph_cli/codegen_agent.py +265 -0
  14. codegraph_cli/config.py +31 -0
  15. codegraph_cli/config_manager.py +341 -0
  16. codegraph_cli/context_manager.py +500 -0
  17. codegraph_cli/crew_agents.py +123 -0
  18. codegraph_cli/crew_chat.py +159 -0
  19. codegraph_cli/crew_tools.py +497 -0
  20. codegraph_cli/diff_engine.py +265 -0
  21. codegraph_cli/embeddings.py +241 -0
  22. codegraph_cli/graph_export.py +144 -0
  23. codegraph_cli/llm.py +642 -0
  24. codegraph_cli/models.py +47 -0
  25. codegraph_cli/models_v2.py +185 -0
  26. codegraph_cli/orchestrator.py +49 -0
  27. codegraph_cli/parser.py +800 -0
  28. codegraph_cli/performance_analyzer.py +223 -0
  29. codegraph_cli/project_context.py +230 -0
  30. codegraph_cli/rag.py +200 -0
  31. codegraph_cli/refactor_agent.py +452 -0
  32. codegraph_cli/security_scanner.py +366 -0
  33. codegraph_cli/storage.py +390 -0
  34. codegraph_cli/templates/graph_interactive.html +257 -0
  35. codegraph_cli/testgen_agent.py +316 -0
  36. codegraph_cli/validation_engine.py +285 -0
  37. codegraph_cli/vector_store.py +293 -0
  38. codegraph_cli-2.0.0.dist-info/METADATA +318 -0
  39. codegraph_cli-2.0.0.dist-info/RECORD +43 -0
  40. codegraph_cli-2.0.0.dist-info/WHEEL +5 -0
  41. codegraph_cli-2.0.0.dist-info/entry_points.txt +2 -0
  42. codegraph_cli-2.0.0.dist-info/licenses/LICENSE +21 -0
  43. codegraph_cli-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,366 @@
1
+ """Security vulnerability scanner."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ import re
7
+ from typing import Dict, List, Optional
8
+
9
+ from .storage import GraphStore
10
+
11
+
12
+ class SecurityScanner:
13
+ """Scan code for security vulnerabilities."""
14
+
15
+ def __init__(self, store: GraphStore):
16
+ self.store = store
17
+
18
+ # Patterns for detecting hardcoded secrets
19
+ self.secret_patterns = [
20
+ (r'api[_-]?key\s*=\s*["\']([^"\']{10,})["\']', "API Key"),
21
+ (r'password\s*=\s*["\']([^"\']+)["\']', "Password"),
22
+ (r'secret\s*=\s*["\']([^"\']{10,})["\']', "Secret"),
23
+ (r'token\s*=\s*["\']([^"\']{10,})["\']', "Token"),
24
+ (r'aws_access_key_id\s*=\s*["\']([^"\']+)["\']', "AWS Access Key"),
25
+ (r'private_key\s*=\s*["\']([^"\']+)["\']', "Private Key"),
26
+ ]
27
+
28
+ # Dangerous functions that could lead to injection
29
+ self.dangerous_functions = {
30
+ "eval": "code_injection",
31
+ "exec": "code_injection",
32
+ "compile": "code_injection",
33
+ "__import__": "code_injection",
34
+ "system": "command_injection",
35
+ "popen": "command_injection",
36
+ "spawn": "command_injection",
37
+ }
38
+
39
+ def scan_file(self, file_path: str, generate_fixes: bool = False) -> List[Dict]:
40
+ """Scan file for security issues.
41
+
42
+ Args:
43
+ file_path: Path to file to scan
44
+ generate_fixes: Whether to generate auto-fix suggestions
45
+
46
+ Returns:
47
+ List of security issue dictionaries
48
+ """
49
+ issues = []
50
+
51
+ nodes = [n for n in self.store.get_nodes() if n["file_path"] == file_path]
52
+ # Skip module-level node when function/class nodes exist to avoid dupes
53
+ has_children = any(n["node_type"] != "module" for n in nodes)
54
+ if has_children:
55
+ nodes = [n for n in nodes if n["node_type"] != "module"]
56
+
57
+ seen: set = set() # (line, type) dedup
58
+
59
+ for node in nodes:
60
+ try:
61
+ tree = ast.parse(node["code"])
62
+ except SyntaxError:
63
+ continue
64
+
65
+ for issue in (
66
+ self._detect_sql_injection(tree, node)
67
+ + self._detect_command_injection(tree, node)
68
+ + self._detect_hardcoded_secrets(node)
69
+ + self._detect_path_traversal(tree, node)
70
+ + self._detect_unsafe_deserialization(tree, node)
71
+ ):
72
+ key = (issue["line"], issue["type"])
73
+ if key not in seen:
74
+ seen.add(key)
75
+ issues.append(issue)
76
+
77
+ # Add auto-fixes if requested
78
+ if generate_fixes:
79
+ issues = [self._add_auto_fix(issue) for issue in issues]
80
+
81
+ return issues
82
+
83
+ def _add_auto_fix(self, issue: Dict) -> Dict:
84
+ """Add auto-fix suggestion to issue."""
85
+ issue_type = issue["type"]
86
+
87
+ if issue_type == "sql_injection":
88
+ issue["auto_fix"] = """# Use parameterized queries
89
+ cursor.execute(
90
+ "SELECT * FROM users WHERE name = ?",
91
+ (username,)
92
+ )
93
+
94
+ # Or for multiple parameters
95
+ cursor.execute(
96
+ "SELECT * FROM users WHERE name = ? AND age = ?",
97
+ (username, age)
98
+ )"""
99
+
100
+ elif issue_type == "command_injection":
101
+ issue["auto_fix"] = """# Use subprocess with shell=False
102
+ import subprocess
103
+
104
+ result = subprocess.run(
105
+ ["command", "arg1", "arg2"], # Pass as list
106
+ shell=False, # Never use shell=True with user input
107
+ capture_output=True,
108
+ text=True
109
+ )"""
110
+
111
+ elif issue_type == "hardcoded_secret":
112
+ issue["auto_fix"] = """# Use environment variables
113
+ import os
114
+
115
+ API_KEY = os.getenv("API_KEY")
116
+ if not API_KEY:
117
+ raise ValueError("API_KEY environment variable not set")
118
+
119
+ # Or use python-dotenv
120
+ from dotenv import load_dotenv
121
+ load_dotenv()
122
+ API_KEY = os.getenv("API_KEY")"""
123
+
124
+ elif issue_type == "path_traversal":
125
+ issue["auto_fix"] = """# Validate and sanitize paths
126
+ from pathlib import Path
127
+
128
+ def safe_open_file(user_path: str, base_dir: str):
129
+ # Resolve to absolute path
130
+ full_path = Path(base_dir) / user_path
131
+ full_path = full_path.resolve()
132
+
133
+ # Ensure it's within base_dir
134
+ if not str(full_path).startswith(str(Path(base_dir).resolve())):
135
+ raise ValueError("Path traversal detected")
136
+
137
+ return open(full_path)"""
138
+
139
+ elif issue_type == "unsafe_deserialization":
140
+ issue["auto_fix"] = """# Use safe alternatives
141
+
142
+ # For YAML: use SafeLoader
143
+ import yaml
144
+ data = yaml.safe_load(file_content)
145
+ # or
146
+ data = yaml.load(file_content, Loader=yaml.SafeLoader)
147
+
148
+ # For pickle: validate source or use JSON instead
149
+ import json
150
+ data = json.loads(file_content) # Safer alternative"""
151
+
152
+ return issue
153
+
154
+ def _detect_sql_injection(self, tree: ast.AST, node: Dict) -> List[Dict]:
155
+ """Detect SQL injection vulnerabilities."""
156
+ issues = []
157
+
158
+ for ast_node in ast.walk(tree):
159
+ # Look for string formatting in SQL-like strings
160
+ if isinstance(ast_node, ast.Call):
161
+ if isinstance(ast_node.func, ast.Attribute):
162
+ # Check for .execute() with string concatenation
163
+ if ast_node.func.attr in ["execute", "executemany", "raw"]:
164
+ if ast_node.args:
165
+ arg = ast_node.args[0]
166
+
167
+ # Check if it's string concatenation (BinOp with Add)
168
+ if isinstance(arg, ast.BinOp) and isinstance(arg.op, ast.Add):
169
+ issues.append({
170
+ "type": "sql_injection",
171
+ "severity": "critical",
172
+ "line": node["start_line"] + ast_node.lineno - 1,
173
+ "message": "Potential SQL injection via string concatenation",
174
+ "suggestion": "Use parameterized queries with placeholders (?)",
175
+ "code_snippet": ast.unparse(ast_node)[:100]
176
+ })
177
+
178
+ # Check for f-strings
179
+ elif isinstance(arg, ast.JoinedStr):
180
+ issues.append({
181
+ "type": "sql_injection",
182
+ "severity": "critical",
183
+ "line": node["start_line"] + ast_node.lineno - 1,
184
+ "message": "Potential SQL injection via f-string",
185
+ "suggestion": "Use parameterized queries instead of f-strings",
186
+ "code_snippet": ast.unparse(ast_node)[:100]
187
+ })
188
+
189
+ # Check for .format()
190
+ elif isinstance(arg, ast.Call):
191
+ if isinstance(arg.func, ast.Attribute) and arg.func.attr == "format":
192
+ issues.append({
193
+ "type": "sql_injection",
194
+ "severity": "critical",
195
+ "line": node["start_line"] + ast_node.lineno - 1,
196
+ "message": "Potential SQL injection via .format()",
197
+ "suggestion": "Use parameterized queries with placeholders",
198
+ "code_snippet": ast.unparse(ast_node)[:100]
199
+ })
200
+
201
+ return issues
202
+
203
+ def _detect_command_injection(self, tree: ast.AST, node: Dict) -> List[Dict]:
204
+ """Detect command injection risks."""
205
+ issues = []
206
+
207
+ for ast_node in ast.walk(tree):
208
+ if isinstance(ast_node, ast.Call):
209
+ func_name = None
210
+
211
+ if isinstance(ast_node.func, ast.Name):
212
+ func_name = ast_node.func.id
213
+ elif isinstance(ast_node.func, ast.Attribute):
214
+ func_name = ast_node.func.attr
215
+
216
+ if func_name in self.dangerous_functions:
217
+ issue_type = self.dangerous_functions[func_name]
218
+
219
+ issues.append({
220
+ "type": issue_type,
221
+ "severity": "critical",
222
+ "line": node["start_line"] + ast_node.lineno - 1,
223
+ "message": f"Unsafe use of '{func_name}()' with potential user input",
224
+ "suggestion": "Use subprocess.run() with shell=False and validate inputs",
225
+ "code_snippet": ast.unparse(ast_node)[:100]
226
+ })
227
+
228
+ # Check for subprocess with shell=True
229
+ if func_name in ["run", "call", "Popen"]:
230
+ for keyword in ast_node.keywords:
231
+ if keyword.arg == "shell":
232
+ if isinstance(keyword.value, ast.Constant) and keyword.value.value is True:
233
+ issues.append({
234
+ "type": "command_injection",
235
+ "severity": "high",
236
+ "line": node["start_line"] + ast_node.lineno - 1,
237
+ "message": "subprocess called with shell=True",
238
+ "suggestion": "Use shell=False and pass command as list",
239
+ "code_snippet": ast.unparse(ast_node)[:100]
240
+ })
241
+
242
+ return issues
243
+
244
+ def _detect_hardcoded_secrets(self, node: Dict) -> List[Dict]:
245
+ """Detect hardcoded secrets in code."""
246
+ issues = []
247
+ code = node["code"]
248
+
249
+ for pattern, secret_type in self.secret_patterns:
250
+ matches = re.finditer(pattern, code, re.IGNORECASE)
251
+ for match in matches:
252
+ # Skip if it looks like a placeholder
253
+ value = match.group(1)
254
+ placeholders = ["your_key_here", "xxx", "***", "placeholder", "example", "test", "dummy"]
255
+
256
+ if any(p in value.lower() for p in placeholders):
257
+ continue
258
+
259
+ # Skip very short values (likely not real secrets)
260
+ if len(value) < 8:
261
+ continue
262
+
263
+ issues.append({
264
+ "type": "hardcoded_secret",
265
+ "severity": "high",
266
+ "line": node["start_line"] + code[:match.start()].count('\n'),
267
+ "message": f"Hardcoded {secret_type} found",
268
+ "suggestion": "Use environment variables or secret management (e.g., os.getenv())",
269
+ "code_snippet": f"{secret_type.lower()}=***"
270
+ })
271
+
272
+ return issues
273
+
274
+ def _detect_path_traversal(self, tree: ast.AST, node: Dict) -> List[Dict]:
275
+ """Detect path traversal vulnerabilities."""
276
+ issues = []
277
+
278
+ for ast_node in ast.walk(tree):
279
+ # Look for file operations
280
+ if isinstance(ast_node, ast.Call):
281
+ func_name = None
282
+
283
+ if isinstance(ast_node.func, ast.Name):
284
+ func_name = ast_node.func.id
285
+ elif isinstance(ast_node.func, ast.Attribute):
286
+ func_name = ast_node.func.attr
287
+
288
+ # File operations that could be vulnerable
289
+ if func_name in ["open", "read", "write", "remove", "unlink", "rmdir"]:
290
+ # Check if path comes from string concatenation (potential user input)
291
+ if ast_node.args:
292
+ path_arg = ast_node.args[0]
293
+
294
+ if isinstance(path_arg, (ast.BinOp, ast.JoinedStr)):
295
+ issues.append({
296
+ "type": "path_traversal",
297
+ "severity": "medium",
298
+ "line": node["start_line"] + ast_node.lineno - 1,
299
+ "message": "Potential path traversal if path comes from user input",
300
+ "suggestion": "Validate and sanitize file paths, use Path.resolve() and check against allowed directories",
301
+ "code_snippet": ast.unparse(ast_node)[:100]
302
+ })
303
+
304
+ return issues
305
+
306
+ def _detect_unsafe_deserialization(self, tree: ast.AST, node: Dict) -> List[Dict]:
307
+ """Detect unsafe deserialization (pickle, yaml)."""
308
+ issues = []
309
+
310
+ for ast_node in ast.walk(tree):
311
+ if isinstance(ast_node, ast.Call):
312
+ # Check for pickle.loads, pickle.load
313
+ if isinstance(ast_node.func, ast.Attribute):
314
+ if isinstance(ast_node.func.value, ast.Name):
315
+ if ast_node.func.value.id == "pickle" and ast_node.func.attr in ["loads", "load"]:
316
+ issues.append({
317
+ "type": "unsafe_deserialization",
318
+ "severity": "high",
319
+ "line": node["start_line"] + ast_node.lineno - 1,
320
+ "message": "Unsafe deserialization with pickle on untrusted data",
321
+ "suggestion": "Use JSON or validate data source before unpickling",
322
+ "code_snippet": ast.unparse(ast_node)[:100]
323
+ })
324
+
325
+ # Check for yaml.load without safe loader
326
+ if isinstance(ast_node.func, ast.Attribute):
327
+ if isinstance(ast_node.func.value, ast.Name):
328
+ if ast_node.func.value.id == "yaml" and ast_node.func.attr == "load":
329
+ # Check if Loader is specified
330
+ has_safe_loader = False
331
+ for keyword in ast_node.keywords:
332
+ if keyword.arg == "Loader":
333
+ if isinstance(keyword.value, ast.Attribute):
334
+ if keyword.value.attr in ["SafeLoader", "BaseLoader"]:
335
+ has_safe_loader = True
336
+
337
+ if not has_safe_loader:
338
+ issues.append({
339
+ "type": "unsafe_deserialization",
340
+ "severity": "high",
341
+ "line": node["start_line"] + ast_node.lineno - 1,
342
+ "message": "yaml.load() without SafeLoader",
343
+ "suggestion": "Use yaml.safe_load() or yaml.load(data, Loader=yaml.SafeLoader)",
344
+ "code_snippet": ast.unparse(ast_node)[:100]
345
+ })
346
+
347
+ return issues
348
+
349
+ def scan_project(self) -> Dict[str, List[Dict]]:
350
+ """Scan entire project for security issues.
351
+
352
+ Returns:
353
+ Dictionary mapping file paths to lists of security issues
354
+ """
355
+ results = {}
356
+
357
+ # Get all unique file paths
358
+ all_nodes = self.store.get_nodes()
359
+ file_paths = set(node["file_path"] for node in all_nodes)
360
+
361
+ for file_path in file_paths:
362
+ issues = self.scan_file(file_path)
363
+ if issues:
364
+ results[file_path] = issues
365
+
366
+ return results