codegraph-cli 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_cli/__init__.py +4 -0
- codegraph_cli/agents.py +191 -0
- codegraph_cli/bug_detector.py +386 -0
- codegraph_cli/chat_agent.py +352 -0
- codegraph_cli/chat_session.py +220 -0
- codegraph_cli/cli.py +330 -0
- codegraph_cli/cli_chat.py +367 -0
- codegraph_cli/cli_diagnose.py +133 -0
- codegraph_cli/cli_refactor.py +230 -0
- codegraph_cli/cli_setup.py +470 -0
- codegraph_cli/cli_test.py +177 -0
- codegraph_cli/cli_v2.py +267 -0
- codegraph_cli/codegen_agent.py +265 -0
- codegraph_cli/config.py +31 -0
- codegraph_cli/config_manager.py +341 -0
- codegraph_cli/context_manager.py +500 -0
- codegraph_cli/crew_agents.py +123 -0
- codegraph_cli/crew_chat.py +159 -0
- codegraph_cli/crew_tools.py +497 -0
- codegraph_cli/diff_engine.py +265 -0
- codegraph_cli/embeddings.py +241 -0
- codegraph_cli/graph_export.py +144 -0
- codegraph_cli/llm.py +642 -0
- codegraph_cli/models.py +47 -0
- codegraph_cli/models_v2.py +185 -0
- codegraph_cli/orchestrator.py +49 -0
- codegraph_cli/parser.py +800 -0
- codegraph_cli/performance_analyzer.py +223 -0
- codegraph_cli/project_context.py +230 -0
- codegraph_cli/rag.py +200 -0
- codegraph_cli/refactor_agent.py +452 -0
- codegraph_cli/security_scanner.py +366 -0
- codegraph_cli/storage.py +390 -0
- codegraph_cli/templates/graph_interactive.html +257 -0
- codegraph_cli/testgen_agent.py +316 -0
- codegraph_cli/validation_engine.py +285 -0
- codegraph_cli/vector_store.py +293 -0
- codegraph_cli-2.0.0.dist-info/METADATA +318 -0
- codegraph_cli-2.0.0.dist-info/RECORD +43 -0
- codegraph_cli-2.0.0.dist-info/WHEEL +5 -0
- codegraph_cli-2.0.0.dist-info/entry_points.txt +2 -0
- codegraph_cli-2.0.0.dist-info/licenses/LICENSE +21 -0
- codegraph_cli-2.0.0.dist-info/top_level.txt +1 -0
codegraph_cli/agents.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""Multi-agent components: graph indexing, retrieval, and summarization."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import deque
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Set
|
|
8
|
+
|
|
9
|
+
from .embeddings import HashEmbeddingModel
|
|
10
|
+
from .llm import LocalLLM
|
|
11
|
+
from .models import ImpactReport
|
|
12
|
+
from .parser import PythonGraphParser
|
|
13
|
+
from .rag import RAGRetriever
|
|
14
|
+
from .storage import GraphStore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class GraphAgent:
|
|
18
|
+
"""Responsible for parsing projects and maintaining graph memory."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, store: GraphStore, embedding_model: HashEmbeddingModel):
|
|
21
|
+
self.store = store
|
|
22
|
+
self.embedding_model = embedding_model
|
|
23
|
+
|
|
24
|
+
def index_project(self, project_root: Path, show_progress: bool = True) -> Dict[str, int]:
|
|
25
|
+
parser = PythonGraphParser(project_root)
|
|
26
|
+
nodes, edges = parser.parse_project()
|
|
27
|
+
|
|
28
|
+
self.store.clear()
|
|
29
|
+
|
|
30
|
+
node_payload = []
|
|
31
|
+
total_nodes = len(nodes)
|
|
32
|
+
|
|
33
|
+
for idx, node in enumerate(nodes, 1):
|
|
34
|
+
text = "\n".join([node.qualname, node.docstring, node.code])
|
|
35
|
+
emb = self.embedding_model.embed_text(text)
|
|
36
|
+
node_payload.append((node, emb))
|
|
37
|
+
|
|
38
|
+
# Show progress
|
|
39
|
+
if show_progress and idx % max(1, total_nodes // 20) == 0:
|
|
40
|
+
progress = (idx / total_nodes) * 100
|
|
41
|
+
print(f"\rš Indexing: {idx}/{total_nodes} nodes ({progress:.0f}%)", end="", flush=True)
|
|
42
|
+
|
|
43
|
+
if show_progress:
|
|
44
|
+
print(f"\rš Indexing: {total_nodes}/{total_nodes} nodes (100%) ")
|
|
45
|
+
|
|
46
|
+
self.store.insert_nodes(node_payload)
|
|
47
|
+
self.store.insert_edges(edges)
|
|
48
|
+
self.store.set_metadata(
|
|
49
|
+
{
|
|
50
|
+
"project_root": str(project_root),
|
|
51
|
+
"node_count": len(nodes),
|
|
52
|
+
"edge_count": len(edges),
|
|
53
|
+
}
|
|
54
|
+
)
|
|
55
|
+
return {"nodes": len(nodes), "edges": len(edges)}
|
|
56
|
+
|
|
57
|
+
def ascii_neighbors(self, symbol: str, depth: int = 1) -> str:
|
|
58
|
+
node = self.store.get_node(symbol)
|
|
59
|
+
if not node:
|
|
60
|
+
return f"Symbol '{symbol}' not found in current project."
|
|
61
|
+
|
|
62
|
+
start = node["node_id"]
|
|
63
|
+
lines = [f"{node['qualname']} ({node['node_type']})"]
|
|
64
|
+
|
|
65
|
+
frontier = [(start, 0)]
|
|
66
|
+
seen = {start}
|
|
67
|
+
while frontier:
|
|
68
|
+
current, level = frontier.pop(0)
|
|
69
|
+
if level >= depth:
|
|
70
|
+
continue
|
|
71
|
+
for edge in self.store.neighbors(current):
|
|
72
|
+
dst = edge["dst"]
|
|
73
|
+
dst_node = self.store.get_node(dst)
|
|
74
|
+
label = dst_node["qualname"] if dst_node else dst
|
|
75
|
+
lines.append(f"{' ' * (level + 1)}|-{edge['edge_type']}-> {label}")
|
|
76
|
+
if dst not in seen:
|
|
77
|
+
seen.add(dst)
|
|
78
|
+
frontier.append((dst, level + 1))
|
|
79
|
+
return "\n".join(lines)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class RAGAgent:
|
|
83
|
+
"""Runs semantic retrieval against project memory."""
|
|
84
|
+
|
|
85
|
+
def __init__(self, retriever: RAGRetriever):
|
|
86
|
+
self.retriever = retriever
|
|
87
|
+
|
|
88
|
+
def semantic_search(self, query: str, top_k: int = 5, node_type: str = None):
|
|
89
|
+
"""Perform semantic search with optional node type filtering.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
query: Search query
|
|
93
|
+
top_k: Number of results
|
|
94
|
+
node_type: Optional filter (function, class, module)
|
|
95
|
+
"""
|
|
96
|
+
return self.retriever.search(query, top_k=top_k, node_type=node_type)
|
|
97
|
+
|
|
98
|
+
def context_for_query(self, query: str, top_k: int = 6) -> str:
|
|
99
|
+
return self.retriever.retrieve_context(query, top_k=top_k)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class SummarizationAgent:
|
|
103
|
+
"""Uses retrieved graph context + local LLM for reasoning/explanations."""
|
|
104
|
+
|
|
105
|
+
def __init__(self, store: GraphStore, llm: LocalLLM):
|
|
106
|
+
self.store = store
|
|
107
|
+
self.llm = llm
|
|
108
|
+
|
|
109
|
+
def impact_analysis(self, symbol: str, hops: int = 2) -> ImpactReport:
|
|
110
|
+
root = self.store.get_node(symbol)
|
|
111
|
+
if not root:
|
|
112
|
+
message = f"Symbol '{symbol}' not found in current project."
|
|
113
|
+
return ImpactReport(root=symbol, impacted=[], explanation=message, ascii_graph=message)
|
|
114
|
+
|
|
115
|
+
root_id = root["node_id"]
|
|
116
|
+
impacted_ids = self._multi_hop(root_id, hops)
|
|
117
|
+
impacted_rows = [self.store.get_node(node_id) for node_id in impacted_ids]
|
|
118
|
+
impacted_rows = [row for row in impacted_rows if row is not None and row["node_id"] != root_id]
|
|
119
|
+
|
|
120
|
+
impacted_names = [row["qualname"] for row in impacted_rows]
|
|
121
|
+
ascii_graph = self._impact_ascii(root_id, hops)
|
|
122
|
+
|
|
123
|
+
prompt = self._build_impact_prompt(root, impacted_rows, ascii_graph)
|
|
124
|
+
explanation = self.llm.explain(prompt)
|
|
125
|
+
|
|
126
|
+
return ImpactReport(
|
|
127
|
+
root=root["qualname"],
|
|
128
|
+
impacted=impacted_names,
|
|
129
|
+
explanation=explanation,
|
|
130
|
+
ascii_graph=ascii_graph,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def _multi_hop(self, start_node_id: str, hops: int) -> Set[str]:
|
|
134
|
+
seen = {start_node_id}
|
|
135
|
+
queue = deque([(start_node_id, 0)])
|
|
136
|
+
|
|
137
|
+
while queue:
|
|
138
|
+
current, depth = queue.popleft()
|
|
139
|
+
if depth >= hops:
|
|
140
|
+
continue
|
|
141
|
+
for edge in self.store.neighbors(current):
|
|
142
|
+
nxt = edge["dst"]
|
|
143
|
+
if nxt not in seen:
|
|
144
|
+
seen.add(nxt)
|
|
145
|
+
queue.append((nxt, depth + 1))
|
|
146
|
+
return seen
|
|
147
|
+
|
|
148
|
+
def _impact_ascii(self, start_node_id: str, hops: int) -> str:
|
|
149
|
+
lines: List[str] = []
|
|
150
|
+
queue = deque([(start_node_id, 0)])
|
|
151
|
+
seen = {start_node_id}
|
|
152
|
+
|
|
153
|
+
while queue:
|
|
154
|
+
current, depth = queue.popleft()
|
|
155
|
+
node = self.store.get_node(current)
|
|
156
|
+
if not node:
|
|
157
|
+
continue
|
|
158
|
+
prefix = " " * depth
|
|
159
|
+
lines.append(f"{prefix}{node['qualname']}")
|
|
160
|
+
if depth >= hops:
|
|
161
|
+
continue
|
|
162
|
+
for edge in self.store.neighbors(current):
|
|
163
|
+
dst = edge["dst"]
|
|
164
|
+
dst_node = self.store.get_node(dst)
|
|
165
|
+
label = dst_node["qualname"] if dst_node else dst
|
|
166
|
+
lines.append(f"{prefix} |- {edge['edge_type']} -> {label}")
|
|
167
|
+
if dst not in seen:
|
|
168
|
+
seen.add(dst)
|
|
169
|
+
queue.append((dst, depth + 1))
|
|
170
|
+
return "\n".join(lines)
|
|
171
|
+
|
|
172
|
+
def _build_impact_prompt(self, root_row, impacted_rows, ascii_graph: str) -> str:
|
|
173
|
+
impacted_block = "\n".join(
|
|
174
|
+
[
|
|
175
|
+
f"- {row['qualname']} ({row['file_path']}:{row['start_line']})"
|
|
176
|
+
for row in impacted_rows[:20]
|
|
177
|
+
]
|
|
178
|
+
)
|
|
179
|
+
return (
|
|
180
|
+
"You are a local code reasoning assistant. "
|
|
181
|
+
"Explain the likely downstream impact of changing a symbol.\n\n"
|
|
182
|
+
f"Root symbol: {root_row['qualname']}\n"
|
|
183
|
+
"Potentially impacted symbols:\n"
|
|
184
|
+
f"{impacted_block or '- None detected'}\n\n"
|
|
185
|
+
"Dependency sketch:\n"
|
|
186
|
+
f"{ascii_graph}\n\n"
|
|
187
|
+
"Output:\n"
|
|
188
|
+
"1) Main risks\n"
|
|
189
|
+
"2) Most likely breakpoints\n"
|
|
190
|
+
"3) Test recommendations"
|
|
191
|
+
)
|
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
"""Bug detection using AST analysis and pattern matching."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Optional, Set
|
|
8
|
+
|
|
9
|
+
from .llm import LocalLLM
|
|
10
|
+
from .storage import GraphStore
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BugDetector:
|
|
14
|
+
"""Detect potential bugs using AST + LLM analysis."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, store: GraphStore, llm: Optional[LocalLLM] = None):
|
|
17
|
+
self.store = store
|
|
18
|
+
self.llm = llm
|
|
19
|
+
|
|
20
|
+
def analyze_file(self, file_path: str, use_llm: bool = False) -> List[Dict]:
|
|
21
|
+
"""Analyze a file for potential bugs.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
file_path: Path to file to analyze
|
|
25
|
+
use_llm: Whether to use LLM for deeper analysis
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
List of issue dictionaries with type, severity, line, message, suggestion
|
|
29
|
+
"""
|
|
30
|
+
issues = []
|
|
31
|
+
|
|
32
|
+
# Get all nodes in this file, but skip the module-level node when
|
|
33
|
+
# individual function/class nodes exist to avoid duplicate analysis.
|
|
34
|
+
nodes = [n for n in self.store.get_nodes() if n["file_path"] == file_path]
|
|
35
|
+
has_children = any(n["node_type"] != "module" for n in nodes)
|
|
36
|
+
if has_children:
|
|
37
|
+
nodes = [n for n in nodes if n["node_type"] != "module"]
|
|
38
|
+
|
|
39
|
+
seen: set = set() # (line, type) dedup across all nodes
|
|
40
|
+
|
|
41
|
+
for node in nodes:
|
|
42
|
+
# Parse code to AST
|
|
43
|
+
try:
|
|
44
|
+
tree = ast.parse(node["code"])
|
|
45
|
+
except SyntaxError:
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
# Run detectors
|
|
49
|
+
for issue in (
|
|
50
|
+
self._detect_null_risks(tree, node)
|
|
51
|
+
+ self._detect_logic_errors(tree, node)
|
|
52
|
+
+ self._detect_resource_leaks(tree, node)
|
|
53
|
+
):
|
|
54
|
+
key = (issue["line"], issue["type"])
|
|
55
|
+
if key not in seen:
|
|
56
|
+
seen.add(key)
|
|
57
|
+
issues.append(issue)
|
|
58
|
+
|
|
59
|
+
# Enhance with LLM analysis if available
|
|
60
|
+
if use_llm and self.llm and issues:
|
|
61
|
+
issues = self._enhance_with_llm(issues, file_path)
|
|
62
|
+
|
|
63
|
+
return issues
|
|
64
|
+
|
|
65
|
+
def _enhance_with_llm(self, issues: List[Dict], file_path: str) -> List[Dict]:
|
|
66
|
+
"""Enhance issues with LLM-powered explanations and fixes."""
|
|
67
|
+
enhanced_issues = []
|
|
68
|
+
|
|
69
|
+
for issue in issues:
|
|
70
|
+
# Generate auto-fix if possible
|
|
71
|
+
auto_fix = self._generate_auto_fix(issue)
|
|
72
|
+
if auto_fix:
|
|
73
|
+
issue["auto_fix"] = auto_fix
|
|
74
|
+
|
|
75
|
+
# Get LLM explanation for complex issues
|
|
76
|
+
if issue["severity"] in ["high", "critical"]:
|
|
77
|
+
explanation = self._get_llm_explanation(issue)
|
|
78
|
+
if explanation:
|
|
79
|
+
issue["llm_explanation"] = explanation
|
|
80
|
+
|
|
81
|
+
enhanced_issues.append(issue)
|
|
82
|
+
|
|
83
|
+
return enhanced_issues
|
|
84
|
+
|
|
85
|
+
def _generate_auto_fix(self, issue: Dict) -> Optional[str]:
|
|
86
|
+
"""Generate automatic fix for the issue."""
|
|
87
|
+
issue_type = issue["type"]
|
|
88
|
+
|
|
89
|
+
if issue_type == "null_pointer_risk":
|
|
90
|
+
# Extract variable name from message
|
|
91
|
+
# Example: "Potential None access on 'user.name'"
|
|
92
|
+
msg = issue["message"]
|
|
93
|
+
if "'" in msg:
|
|
94
|
+
parts = msg.split("'")
|
|
95
|
+
if len(parts) >= 2:
|
|
96
|
+
var_access = parts[1] # e.g., "user.name"
|
|
97
|
+
if "." in var_access:
|
|
98
|
+
var_name = var_access.split(".")[0]
|
|
99
|
+
return f"""if {var_name} is not None:
|
|
100
|
+
# Your code here
|
|
101
|
+
result = {var_access}
|
|
102
|
+
else:
|
|
103
|
+
# Handle None case
|
|
104
|
+
result = None"""
|
|
105
|
+
|
|
106
|
+
elif issue_type == "resource_leak":
|
|
107
|
+
# Suggest using 'with' statement
|
|
108
|
+
if "code_snippet" in issue:
|
|
109
|
+
snippet = issue["code_snippet"]
|
|
110
|
+
if "open(" in snippet:
|
|
111
|
+
return f"""with {snippet} as f:
|
|
112
|
+
# Your code here
|
|
113
|
+
data = f.read()"""
|
|
114
|
+
|
|
115
|
+
elif issue_type == "infinite_loop":
|
|
116
|
+
return """# Add a break condition
|
|
117
|
+
max_iterations = 1000
|
|
118
|
+
iteration = 0
|
|
119
|
+
while True:
|
|
120
|
+
iteration += 1
|
|
121
|
+
if iteration >= max_iterations:
|
|
122
|
+
break
|
|
123
|
+
# Your loop code here"""
|
|
124
|
+
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
def _get_llm_explanation(self, issue: Dict) -> Optional[str]:
|
|
128
|
+
"""Get LLM explanation for the issue."""
|
|
129
|
+
if not self.llm:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
prompt = f"""Explain this code issue in 2-3 sentences:
|
|
133
|
+
|
|
134
|
+
Issue Type: {issue['type']}
|
|
135
|
+
Severity: {issue['severity']}
|
|
136
|
+
Message: {issue['message']}
|
|
137
|
+
Code: {issue.get('code_snippet', 'N/A')}
|
|
138
|
+
|
|
139
|
+
Provide a clear explanation of:
|
|
140
|
+
1. Why this is a problem
|
|
141
|
+
2. What could go wrong
|
|
142
|
+
3. Best practice to fix it"""
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
explanation = self.llm.explain(prompt)
|
|
146
|
+
return explanation.strip()
|
|
147
|
+
except Exception:
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
def _detect_null_risks(self, tree: ast.AST, node: Dict) -> List[Dict]:
|
|
151
|
+
"""Detect potential None access without checks.
|
|
152
|
+
|
|
153
|
+
Looks for:
|
|
154
|
+
- Attribute access on variables that could be None
|
|
155
|
+
- Dictionary access without .get()
|
|
156
|
+
- List access without bounds checking
|
|
157
|
+
"""
|
|
158
|
+
issues = []
|
|
159
|
+
seen_lines: set = set() # Deduplicate by line number
|
|
160
|
+
|
|
161
|
+
# Collect decorator lines so we skip attribute access on decorators
|
|
162
|
+
decorator_lines: set = set()
|
|
163
|
+
for ast_node in ast.walk(tree):
|
|
164
|
+
if isinstance(ast_node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
165
|
+
for dec in ast_node.decorator_list:
|
|
166
|
+
# Mark all lines spanned by decorator expressions
|
|
167
|
+
for sub in ast.walk(dec):
|
|
168
|
+
if hasattr(sub, 'lineno'):
|
|
169
|
+
decorator_lines.add(sub.lineno)
|
|
170
|
+
|
|
171
|
+
for ast_node in ast.walk(tree):
|
|
172
|
+
# Look for attribute access that might fail
|
|
173
|
+
if isinstance(ast_node, ast.Attribute):
|
|
174
|
+
# Skip decorator expressions (e.g. @app.route)
|
|
175
|
+
if ast_node.lineno in decorator_lines:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
# Get the variable being accessed
|
|
179
|
+
if isinstance(ast_node.value, ast.Name):
|
|
180
|
+
var_name = ast_node.value.id
|
|
181
|
+
|
|
182
|
+
# Skip well-known safe names
|
|
183
|
+
if var_name in ('self', 'cls', 'super', 'os', 'sys', 'math',
|
|
184
|
+
'json', 're', 'logging', 'typing', 'pathlib'):
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
# Deduplicate: only report once per source line
|
|
188
|
+
report_line = node["start_line"] + ast_node.lineno - 1
|
|
189
|
+
if report_line in seen_lines:
|
|
190
|
+
continue
|
|
191
|
+
|
|
192
|
+
# Check if this variable is assigned from a function that could return None
|
|
193
|
+
if self._could_be_none(tree, var_name, ast_node.lineno):
|
|
194
|
+
seen_lines.add(report_line)
|
|
195
|
+
issues.append({
|
|
196
|
+
"type": "null_pointer_risk",
|
|
197
|
+
"severity": "medium",
|
|
198
|
+
"line": report_line,
|
|
199
|
+
"message": f"Potential None access on '{var_name}.{ast_node.attr}'",
|
|
200
|
+
"suggestion": f"Add None check: if {var_name} is not None:",
|
|
201
|
+
"code_snippet": ast.unparse(ast_node)
|
|
202
|
+
})
|
|
203
|
+
|
|
204
|
+
return issues
|
|
205
|
+
|
|
206
|
+
def _could_be_none(self, tree: ast.AST, var_name: str, access_line: int) -> bool:
|
|
207
|
+
"""Check if a variable could be None at the point of access.
|
|
208
|
+
|
|
209
|
+
This is a simplified heuristic that checks:
|
|
210
|
+
- If variable is assigned from a function call (could return None)
|
|
211
|
+
- If there's no None check between assignment and access
|
|
212
|
+
- Excludes constructors (ClassName(...)) which never return None
|
|
213
|
+
"""
|
|
214
|
+
for node in ast.walk(tree):
|
|
215
|
+
if isinstance(node, ast.Assign):
|
|
216
|
+
for target in node.targets:
|
|
217
|
+
if isinstance(target, ast.Name) and target.id == var_name:
|
|
218
|
+
# Check if assigned from function call
|
|
219
|
+
if isinstance(node.value, ast.Call):
|
|
220
|
+
# --- Skip class constructors / capitalized calls ---
|
|
221
|
+
# e.g. app = Flask(__name__) or data = dict()
|
|
222
|
+
call_name = self._get_call_name(node.value)
|
|
223
|
+
if call_name and call_name[0].isupper():
|
|
224
|
+
return False # Constructor ā never None
|
|
225
|
+
# Also skip well-known stdlib constructors
|
|
226
|
+
if call_name in ('dict', 'list', 'set', 'tuple',
|
|
227
|
+
'frozenset', 'bytearray', 'bytes',
|
|
228
|
+
'str', 'int', 'float', 'bool',
|
|
229
|
+
'object', 'type', 'super',
|
|
230
|
+
'defaultdict', 'OrderedDict',
|
|
231
|
+
'Counter', 'deque', 'namedtuple'):
|
|
232
|
+
return False
|
|
233
|
+
# Check if there's a None check before access
|
|
234
|
+
if not self._has_none_check_between(tree, var_name, node.lineno, access_line):
|
|
235
|
+
return True
|
|
236
|
+
|
|
237
|
+
return False
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def _get_call_name(call_node: ast.Call) -> Optional[str]:
|
|
241
|
+
"""Extract the simple function/class name from a Call node."""
|
|
242
|
+
func = call_node.func
|
|
243
|
+
if isinstance(func, ast.Name):
|
|
244
|
+
return func.id
|
|
245
|
+
if isinstance(func, ast.Attribute):
|
|
246
|
+
return func.attr
|
|
247
|
+
return None
|
|
248
|
+
|
|
249
|
+
def _has_none_check_between(self, tree: ast.AST, var_name: str, start_line: int, end_line: int) -> bool:
|
|
250
|
+
"""Check if there's a None check for var_name between start and end lines."""
|
|
251
|
+
for node in ast.walk(tree):
|
|
252
|
+
if isinstance(node, ast.If):
|
|
253
|
+
# Check if this is a None check
|
|
254
|
+
if hasattr(node, 'lineno') and start_line < node.lineno < end_line:
|
|
255
|
+
# Look for patterns like: if var is not None, if var, etc.
|
|
256
|
+
test = node.test
|
|
257
|
+
if isinstance(test, ast.Compare):
|
|
258
|
+
if isinstance(test.left, ast.Name) and test.left.id == var_name:
|
|
259
|
+
# Check for "is not None" pattern
|
|
260
|
+
if any(isinstance(op, ast.IsNot) for op in test.ops):
|
|
261
|
+
return True
|
|
262
|
+
elif isinstance(test, ast.Name) and test.id == var_name:
|
|
263
|
+
# Simple truthiness check
|
|
264
|
+
return True
|
|
265
|
+
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
def _detect_logic_errors(self, tree: ast.AST, node: Dict) -> List[Dict]:
|
|
269
|
+
"""Detect logic errors like unreachable code and infinite loops."""
|
|
270
|
+
issues = []
|
|
271
|
+
|
|
272
|
+
for ast_node in ast.walk(tree):
|
|
273
|
+
# Detect unreachable code after return
|
|
274
|
+
if isinstance(ast_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
275
|
+
for i, stmt in enumerate(ast_node.body):
|
|
276
|
+
if isinstance(stmt, ast.Return) and i < len(ast_node.body) - 1:
|
|
277
|
+
# Check if next statement is not just a comment or pass
|
|
278
|
+
next_stmt = ast_node.body[i + 1]
|
|
279
|
+
if not isinstance(next_stmt, ast.Pass):
|
|
280
|
+
issues.append({
|
|
281
|
+
"type": "unreachable_code",
|
|
282
|
+
"severity": "low",
|
|
283
|
+
"line": node["start_line"] + next_stmt.lineno - 1,
|
|
284
|
+
"message": "Unreachable code after return statement",
|
|
285
|
+
"suggestion": "Remove unreachable code or move return to end",
|
|
286
|
+
"code_snippet": ast.unparse(next_stmt)[:100]
|
|
287
|
+
})
|
|
288
|
+
|
|
289
|
+
# Detect infinite loops
|
|
290
|
+
if isinstance(ast_node, ast.While):
|
|
291
|
+
if isinstance(ast_node.test, ast.Constant) and ast_node.test.value is True:
|
|
292
|
+
# Check if there's a break statement
|
|
293
|
+
has_break = any(isinstance(n, ast.Break) for n in ast.walk(ast_node))
|
|
294
|
+
if not has_break:
|
|
295
|
+
issues.append({
|
|
296
|
+
"type": "infinite_loop",
|
|
297
|
+
"severity": "high",
|
|
298
|
+
"line": node["start_line"] + ast_node.lineno - 1,
|
|
299
|
+
"message": "Potential infinite loop without break statement",
|
|
300
|
+
"suggestion": "Add break condition or change loop condition",
|
|
301
|
+
"code_snippet": f"while True: ..."
|
|
302
|
+
})
|
|
303
|
+
|
|
304
|
+
# Detect comparison with True/False (code smell)
|
|
305
|
+
if isinstance(ast_node, ast.Compare):
|
|
306
|
+
for op, comparator in zip(ast_node.ops, ast_node.comparators):
|
|
307
|
+
if isinstance(comparator, ast.Constant) and isinstance(comparator.value, bool):
|
|
308
|
+
if isinstance(op, ast.Eq):
|
|
309
|
+
issues.append({
|
|
310
|
+
"type": "boolean_comparison",
|
|
311
|
+
"severity": "low",
|
|
312
|
+
"line": node["start_line"] + ast_node.lineno - 1,
|
|
313
|
+
"message": f"Unnecessary comparison with {comparator.value}",
|
|
314
|
+
"suggestion": "Use variable directly in condition",
|
|
315
|
+
"code_snippet": ast.unparse(ast_node)
|
|
316
|
+
})
|
|
317
|
+
|
|
318
|
+
return issues
|
|
319
|
+
|
|
320
|
+
def _detect_resource_leaks(self, tree: ast.AST, node: Dict) -> List[Dict]:
|
|
321
|
+
"""Detect unclosed resources (files, connections)."""
|
|
322
|
+
issues = []
|
|
323
|
+
|
|
324
|
+
# Track 'with' statement contexts
|
|
325
|
+
with_contexts: Set[int] = set()
|
|
326
|
+
for ast_node in ast.walk(tree):
|
|
327
|
+
if isinstance(ast_node, ast.With):
|
|
328
|
+
with_contexts.add(id(ast_node))
|
|
329
|
+
|
|
330
|
+
# Look for resource-opening calls
|
|
331
|
+
for ast_node in ast.walk(tree):
|
|
332
|
+
if isinstance(ast_node, ast.Call):
|
|
333
|
+
func_name = None
|
|
334
|
+
|
|
335
|
+
if isinstance(ast_node.func, ast.Name):
|
|
336
|
+
func_name = ast_node.func.id
|
|
337
|
+
elif isinstance(ast_node.func, ast.Attribute):
|
|
338
|
+
func_name = ast_node.func.attr
|
|
339
|
+
|
|
340
|
+
# Check for file operations
|
|
341
|
+
if func_name == "open":
|
|
342
|
+
# Check if this call is inside a 'with' statement
|
|
343
|
+
is_in_with = self._is_inside_with(tree, ast_node)
|
|
344
|
+
|
|
345
|
+
if not is_in_with:
|
|
346
|
+
issues.append({
|
|
347
|
+
"type": "resource_leak",
|
|
348
|
+
"severity": "medium",
|
|
349
|
+
"line": node["start_line"] + ast_node.lineno - 1,
|
|
350
|
+
"message": "File opened without 'with' statement",
|
|
351
|
+
"suggestion": "Use 'with open(...) as f:' to ensure file is closed",
|
|
352
|
+
"code_snippet": ast.unparse(ast_node)
|
|
353
|
+
})
|
|
354
|
+
|
|
355
|
+
return issues
|
|
356
|
+
|
|
357
|
+
def _is_inside_with(self, tree: ast.AST, target: ast.AST) -> bool:
|
|
358
|
+
"""Check if target node is inside a 'with' statement."""
|
|
359
|
+
# This is a simplified check - proper implementation would need
|
|
360
|
+
# to track the AST hierarchy
|
|
361
|
+
for node in ast.walk(tree):
|
|
362
|
+
if isinstance(node, ast.With):
|
|
363
|
+
# Check if target is in the body
|
|
364
|
+
for item in node.items:
|
|
365
|
+
if item.context_expr == target:
|
|
366
|
+
return True
|
|
367
|
+
return False
|
|
368
|
+
|
|
369
|
+
def analyze_project(self) -> Dict[str, List[Dict]]:
|
|
370
|
+
"""Analyze entire project for bugs.
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
Dictionary mapping file paths to lists of issues
|
|
374
|
+
"""
|
|
375
|
+
results = {}
|
|
376
|
+
|
|
377
|
+
# Get all unique file paths
|
|
378
|
+
all_nodes = self.store.get_nodes()
|
|
379
|
+
file_paths = set(node["file_path"] for node in all_nodes)
|
|
380
|
+
|
|
381
|
+
for file_path in file_paths:
|
|
382
|
+
issues = self.analyze_file(file_path)
|
|
383
|
+
if issues:
|
|
384
|
+
results[file_path] = issues
|
|
385
|
+
|
|
386
|
+
return results
|