code2flow-toon 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code2flow/__init__.py +47 -0
- code2flow/__main__.py +6 -0
- code2flow/analysis/__init__.py +17 -0
- code2flow/analysis/call_graph.py +210 -0
- code2flow/analysis/cfg.py +293 -0
- code2flow/analysis/coupling.py +77 -0
- code2flow/analysis/data_analysis.py +249 -0
- code2flow/analysis/dfg.py +224 -0
- code2flow/analysis/smells.py +192 -0
- code2flow/cli.py +464 -0
- code2flow/core/__init__.py +36 -0
- code2flow/core/analyzer.py +765 -0
- code2flow/core/config.py +177 -0
- code2flow/core/models.py +194 -0
- code2flow/core/streaming_analyzer.py +666 -0
- code2flow/exporters/__init__.py +17 -0
- code2flow/exporters/base.py +13 -0
- code2flow/exporters/json_exporter.py +17 -0
- code2flow/exporters/llm_exporter.py +199 -0
- code2flow/exporters/mermaid_exporter.py +67 -0
- code2flow/exporters/toon.py +401 -0
- code2flow/exporters/yaml_exporter.py +108 -0
- code2flow/llm_flow_generator.py +451 -0
- code2flow/llm_task_generator.py +263 -0
- code2flow/mermaid_generator.py +481 -0
- code2flow/nlp/__init__.py +23 -0
- code2flow/nlp/config.py +174 -0
- code2flow/nlp/entity_resolution.py +326 -0
- code2flow/nlp/intent_matching.py +297 -0
- code2flow/nlp/normalization.py +122 -0
- code2flow/nlp/pipeline.py +388 -0
- code2flow/patterns/__init__.py +0 -0
- code2flow/patterns/detector.py +168 -0
- code2flow/refactor/__init__.py +0 -0
- code2flow/refactor/prompt_engine.py +150 -0
- code2flow/visualizers/__init__.py +0 -0
- code2flow/visualizers/graph.py +196 -0
- code2flow_toon-0.2.4.dist-info/METADATA +599 -0
- code2flow_toon-0.2.4.dist-info/RECORD +43 -0
- code2flow_toon-0.2.4.dist-info/WHEEL +5 -0
- code2flow_toon-0.2.4.dist-info/entry_points.txt +2 -0
- code2flow_toon-0.2.4.dist-info/licenses/LICENSE +201 -0
- code2flow_toon-0.2.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,765 @@
|
|
|
1
|
+
"""Optimized project analyzer with caching and parallel processing."""
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import pickle
|
|
8
|
+
import time
|
|
9
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
10
|
+
from functools import lru_cache
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
13
|
+
from radon.complexity import cc_visit, cc_rank
|
|
14
|
+
import fnmatch
|
|
15
|
+
import networkx as nx
|
|
16
|
+
import vulture
|
|
17
|
+
|
|
18
|
+
from .config import Config, FAST_CONFIG, FilterConfig
|
|
19
|
+
from .models import (
|
|
20
|
+
AnalysisResult, ClassInfo, FlowEdge, FlowNode,
|
|
21
|
+
FunctionInfo, ModuleInfo, Pattern
|
|
22
|
+
)
|
|
23
|
+
from ..analysis.dfg import DFGExtractor
|
|
24
|
+
from ..analysis.call_graph import CallGraphExtractor
|
|
25
|
+
from ..analysis.coupling import CouplingAnalyzer
|
|
26
|
+
from ..analysis.smells import SmellDetector
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class FileCache:
|
|
30
|
+
"""Cache for parsed AST files."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, cache_dir: str = ".code2flow_cache", ttl_hours: int = 24):
|
|
33
|
+
self.cache_dir = Path(cache_dir)
|
|
34
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
35
|
+
self.ttl_seconds = ttl_hours * 3600
|
|
36
|
+
|
|
37
|
+
def _get_cache_key(self, file_path: str, content: str) -> str:
|
|
38
|
+
"""Generate cache key from file path and content hash."""
|
|
39
|
+
content_hash = hashlib.md5(content.encode()).hexdigest()[:16]
|
|
40
|
+
return f"{Path(file_path).stem}_{content_hash}"
|
|
41
|
+
|
|
42
|
+
def _get_cache_path(self, cache_key: str) -> Path:
|
|
43
|
+
"""Get cache file path."""
|
|
44
|
+
return self.cache_dir / f"{cache_key}.pkl"
|
|
45
|
+
|
|
46
|
+
def get(self, file_path: str, content: str) -> Optional[Tuple[ast.AST, str]]:
|
|
47
|
+
"""Get cached AST if available and not expired."""
|
|
48
|
+
cache_key = self._get_cache_key(file_path, content)
|
|
49
|
+
cache_path = self._get_cache_path(cache_key)
|
|
50
|
+
|
|
51
|
+
if not cache_path.exists():
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
# Check TTL
|
|
55
|
+
age = time.time() - cache_path.stat().st_mtime
|
|
56
|
+
if age > self.ttl_seconds:
|
|
57
|
+
cache_path.unlink()
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
with open(cache_path, 'rb') as f:
|
|
62
|
+
return pickle.load(f)
|
|
63
|
+
except Exception:
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
def put(self, file_path: str, content: str, data: Tuple[ast.AST, str]) -> None:
|
|
67
|
+
"""Store AST in cache."""
|
|
68
|
+
cache_key = self._get_cache_key(file_path, content)
|
|
69
|
+
cache_path = self._get_cache_path(cache_key)
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
with open(cache_path, 'wb') as f:
|
|
73
|
+
pickle.dump(data, f)
|
|
74
|
+
except Exception:
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
def clear(self) -> None:
|
|
78
|
+
"""Clear all cached files."""
|
|
79
|
+
for f in self.cache_dir.glob("*.pkl"):
|
|
80
|
+
f.unlink()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class FastFileFilter:
|
|
84
|
+
"""Fast file filtering with pattern matching."""
|
|
85
|
+
|
|
86
|
+
def __init__(self, config: FilterConfig):
|
|
87
|
+
self.config = config
|
|
88
|
+
self._exclude_patterns = [p.lower() for p in config.exclude_patterns]
|
|
89
|
+
self._include_patterns = [p.lower() for p in config.include_patterns]
|
|
90
|
+
|
|
91
|
+
def should_process(self, file_path: str) -> bool:
|
|
92
|
+
"""Check if file should be processed."""
|
|
93
|
+
path_lower = file_path.lower()
|
|
94
|
+
|
|
95
|
+
# Check exclude patterns
|
|
96
|
+
for pattern in self._exclude_patterns:
|
|
97
|
+
if fnmatch.fnmatch(path_lower, pattern) or pattern in path_lower:
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
# Check include patterns (if any)
|
|
101
|
+
if self._include_patterns:
|
|
102
|
+
return any(
|
|
103
|
+
fnmatch.fnmatch(path_lower, p) or p in path_lower
|
|
104
|
+
for p in self._include_patterns
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
return True
|
|
108
|
+
|
|
109
|
+
def should_skip_function(self, name: str, line_count: int, is_private: bool = False,
|
|
110
|
+
is_property: bool = False, is_accessor: bool = False) -> bool:
|
|
111
|
+
"""Check if function should be skipped."""
|
|
112
|
+
if line_count < self.config.min_function_lines:
|
|
113
|
+
return True
|
|
114
|
+
if self.config.skip_private and is_private:
|
|
115
|
+
return True
|
|
116
|
+
if self.config.skip_properties and is_property:
|
|
117
|
+
return True
|
|
118
|
+
if self.config.skip_accessors and is_accessor:
|
|
119
|
+
return True
|
|
120
|
+
return False
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class FileAnalyzer:
|
|
124
|
+
"""Analyzes a single file."""
|
|
125
|
+
|
|
126
|
+
def __init__(self, config: Config, cache: Optional[FileCache] = None):
|
|
127
|
+
self.config = config
|
|
128
|
+
self.cache = cache
|
|
129
|
+
self.stats = {
|
|
130
|
+
'files_processed': 0,
|
|
131
|
+
'functions_found': 0,
|
|
132
|
+
'classes_found': 0,
|
|
133
|
+
'nodes_created': 0,
|
|
134
|
+
'cache_hits': 0,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
def analyze_file(self, file_path: str, module_name: str) -> Dict:
|
|
138
|
+
"""Analyze a single Python file."""
|
|
139
|
+
path = Path(file_path)
|
|
140
|
+
if not path.exists():
|
|
141
|
+
return {}
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
content = path.read_text(encoding='utf-8', errors='ignore')
|
|
145
|
+
except Exception:
|
|
146
|
+
return {}
|
|
147
|
+
|
|
148
|
+
# Try cache
|
|
149
|
+
if self.cache and self.config.performance.enable_cache:
|
|
150
|
+
cached = self.cache.get(file_path, content)
|
|
151
|
+
if cached:
|
|
152
|
+
self.stats['cache_hits'] += 1
|
|
153
|
+
ast_tree, _ = cached
|
|
154
|
+
else:
|
|
155
|
+
try:
|
|
156
|
+
ast_tree = ast.parse(content)
|
|
157
|
+
self.cache.put(file_path, content, (ast_tree, content))
|
|
158
|
+
except SyntaxError:
|
|
159
|
+
return {}
|
|
160
|
+
else:
|
|
161
|
+
try:
|
|
162
|
+
ast_tree = ast.parse(content)
|
|
163
|
+
except SyntaxError:
|
|
164
|
+
return {}
|
|
165
|
+
|
|
166
|
+
result = self._analyze_ast(ast_tree, file_path, module_name, content)
|
|
167
|
+
self.stats['files_processed'] += 1
|
|
168
|
+
return result
|
|
169
|
+
|
|
170
|
+
def _analyze_ast(self, tree: ast.AST, file_path: str, module_name: str, content: str) -> Dict:
|
|
171
|
+
"""Analyze AST and extract structure."""
|
|
172
|
+
result = {
|
|
173
|
+
'module': ModuleInfo(
|
|
174
|
+
name=module_name,
|
|
175
|
+
file=file_path,
|
|
176
|
+
is_package=Path(file_path).name == '__init__.py'
|
|
177
|
+
),
|
|
178
|
+
'functions': {},
|
|
179
|
+
'classes': {},
|
|
180
|
+
'nodes': {},
|
|
181
|
+
'edges': [],
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
lines = content.split('\n')
|
|
185
|
+
|
|
186
|
+
for node in tree.body:
|
|
187
|
+
if isinstance(node, ast.ClassDef):
|
|
188
|
+
self._process_class(node, file_path, module_name, result, lines)
|
|
189
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
190
|
+
self._process_function(node, file_path, module_name, result, lines, None)
|
|
191
|
+
|
|
192
|
+
# Calculate complexity with radon
|
|
193
|
+
try:
|
|
194
|
+
complexity_results = cc_visit(content)
|
|
195
|
+
for entry in complexity_results:
|
|
196
|
+
# Radon returns a list of objects (Function, Class, Method)
|
|
197
|
+
name = getattr(entry, 'name', '')
|
|
198
|
+
classname = getattr(entry, 'classname', None)
|
|
199
|
+
|
|
200
|
+
if classname:
|
|
201
|
+
full_name = f"{module_name}.{classname}.{name}"
|
|
202
|
+
else:
|
|
203
|
+
full_name = f"{module_name}.{name}"
|
|
204
|
+
|
|
205
|
+
if full_name in result['functions']:
|
|
206
|
+
result['functions'][full_name].complexity = {
|
|
207
|
+
'cyclomatic_complexity': entry.complexity,
|
|
208
|
+
'cc_rank': cc_rank(entry.complexity)
|
|
209
|
+
}
|
|
210
|
+
elif full_name in result['classes']:
|
|
211
|
+
# We can store class complexity too if needed
|
|
212
|
+
result['classes'][full_name].is_state_machine = result['classes'][full_name].is_state_machine or (entry.complexity > 20)
|
|
213
|
+
except Exception as e:
|
|
214
|
+
if self.config.verbose:
|
|
215
|
+
print(f"Error calculating complexity for {file_path}: {e}")
|
|
216
|
+
|
|
217
|
+
# New: Deep Analysis for refactoring
|
|
218
|
+
try:
|
|
219
|
+
dfg_ext = DFGExtractor(self.config)
|
|
220
|
+
dfg_res = dfg_ext.extract(tree, module_name, file_path)
|
|
221
|
+
result['mutations'] = dfg_res.mutations
|
|
222
|
+
result['data_flows'] = dfg_res.data_flows
|
|
223
|
+
|
|
224
|
+
# Update function calls from CG extractor which is more robust
|
|
225
|
+
cg_ext = CallGraphExtractor(self.config)
|
|
226
|
+
cg_res = cg_ext.extract(tree, module_name, file_path)
|
|
227
|
+
for func_name, cg_func in cg_res.functions.items():
|
|
228
|
+
if func_name in result['functions']:
|
|
229
|
+
result['functions'][func_name].calls.extend(list(cg_func.calls))
|
|
230
|
+
except Exception as e:
|
|
231
|
+
if self.config.verbose:
|
|
232
|
+
print(f"Error in deep analysis for {file_path}: {e}")
|
|
233
|
+
|
|
234
|
+
self.stats['files_processed'] += 1
|
|
235
|
+
return result
|
|
236
|
+
|
|
237
|
+
def _process_class(self, node: ast.ClassDef, file_path: str, module_name: str,
|
|
238
|
+
result: Dict, lines: List[str]) -> None:
|
|
239
|
+
"""Process class definition."""
|
|
240
|
+
class_name = node.name
|
|
241
|
+
qualified_name = f"{module_name}.{class_name}"
|
|
242
|
+
|
|
243
|
+
methods = []
|
|
244
|
+
for item in node.body:
|
|
245
|
+
if isinstance(item, ast.FunctionDef) or isinstance(item, ast.AsyncFunctionDef):
|
|
246
|
+
method_name = item.name
|
|
247
|
+
qualified_method = f"{qualified_name}.{method_name}"
|
|
248
|
+
methods.append(qualified_method)
|
|
249
|
+
self._process_function(item, file_path, module_name, result, lines, class_name)
|
|
250
|
+
|
|
251
|
+
result['classes'][qualified_name] = ClassInfo(
|
|
252
|
+
name=class_name,
|
|
253
|
+
qualified_name=qualified_name,
|
|
254
|
+
file=file_path,
|
|
255
|
+
line=node.lineno,
|
|
256
|
+
module=module_name,
|
|
257
|
+
bases=[self._get_base_name(b) for b in node.bases],
|
|
258
|
+
methods=methods,
|
|
259
|
+
docstring=ast.get_docstring(node),
|
|
260
|
+
)
|
|
261
|
+
result['module'].classes.append(qualified_name)
|
|
262
|
+
self.stats['classes_found'] += 1
|
|
263
|
+
|
|
264
|
+
def _process_function(self, node: ast.FunctionDef, file_path: str, module_name: str,
|
|
265
|
+
result: Dict, lines: List[str], class_name: Optional[str]) -> None:
|
|
266
|
+
"""Process function definition with limited CFG depth."""
|
|
267
|
+
func_name = node.name
|
|
268
|
+
if class_name:
|
|
269
|
+
qualified_name = f"{module_name}.{class_name}.{func_name}"
|
|
270
|
+
else:
|
|
271
|
+
qualified_name = f"{module_name}.{func_name}"
|
|
272
|
+
|
|
273
|
+
# Check filtering - use FastFileFilter for function-level filtering
|
|
274
|
+
line_count = (node.end_lineno - node.lineno + 1) if node.end_lineno else 1
|
|
275
|
+
is_private = func_name.startswith('_')
|
|
276
|
+
is_property = any(
|
|
277
|
+
isinstance(d, ast.Name) and d.id == 'property'
|
|
278
|
+
for d in node.decorator_list
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
filter_obj = FastFileFilter(self.config.filters)
|
|
282
|
+
if filter_obj.should_skip_function(func_name, line_count, is_private, is_property):
|
|
283
|
+
return
|
|
284
|
+
|
|
285
|
+
# Create function info
|
|
286
|
+
func_info = FunctionInfo(
|
|
287
|
+
name=func_name,
|
|
288
|
+
qualified_name=qualified_name,
|
|
289
|
+
file=file_path,
|
|
290
|
+
line=node.lineno,
|
|
291
|
+
column=node.col_offset,
|
|
292
|
+
module=module_name,
|
|
293
|
+
class_name=class_name,
|
|
294
|
+
is_method=class_name is not None,
|
|
295
|
+
is_private=is_private,
|
|
296
|
+
is_property=is_property,
|
|
297
|
+
docstring=ast.get_docstring(node),
|
|
298
|
+
args=[arg.arg for arg in node.args.args],
|
|
299
|
+
decorators=[self._get_decorator_name(d) for d in node.decorator_list],
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Build simplified CFG with depth limit
|
|
303
|
+
if not self.config.performance.skip_data_flow:
|
|
304
|
+
self._build_cfg(node, qualified_name, func_info, result)
|
|
305
|
+
|
|
306
|
+
# Find calls
|
|
307
|
+
for child in ast.walk(node):
|
|
308
|
+
if isinstance(child, ast.Call):
|
|
309
|
+
called_name = self._get_call_name(child.func)
|
|
310
|
+
if called_name:
|
|
311
|
+
func_info.calls.append(called_name)
|
|
312
|
+
|
|
313
|
+
result['functions'][qualified_name] = func_info
|
|
314
|
+
result['module'].functions.append(qualified_name)
|
|
315
|
+
self.stats['functions_found'] += 1
|
|
316
|
+
|
|
317
|
+
def _build_cfg(self, node: ast.FunctionDef, func_name: str,
|
|
318
|
+
func_info: FunctionInfo, result: Dict) -> None:
|
|
319
|
+
"""Build simplified control flow graph with depth limit."""
|
|
320
|
+
max_depth = self.config.depth.max_cfg_depth
|
|
321
|
+
|
|
322
|
+
entry_id = f"{func_name}_entry"
|
|
323
|
+
exit_id = f"{func_name}_exit"
|
|
324
|
+
|
|
325
|
+
# Create entry/exit nodes
|
|
326
|
+
result['nodes'][entry_id] = FlowNode(
|
|
327
|
+
id=entry_id, type='ENTRY', label='entry', function=func_name
|
|
328
|
+
)
|
|
329
|
+
result['nodes'][exit_id] = FlowNode(
|
|
330
|
+
id=exit_id, type='EXIT', label='exit', function=func_name
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
func_info.cfg_nodes.extend([entry_id, exit_id])
|
|
334
|
+
|
|
335
|
+
func_info.cfg_entry = entry_id
|
|
336
|
+
func_info.cfg_exit = exit_id
|
|
337
|
+
|
|
338
|
+
# Build CFG with depth limiting
|
|
339
|
+
self._process_cfg_block(node.body, entry_id, exit_id, func_name,
|
|
340
|
+
func_info, result, depth=0, max_depth=max_depth)
|
|
341
|
+
|
|
342
|
+
self.stats['nodes_created'] += len(result['nodes'])
|
|
343
|
+
|
|
344
|
+
def _process_cfg_block(self, body: List[ast.stmt], entry: str, exit: str,
|
|
345
|
+
func_name: str, func_info: FunctionInfo, result: Dict, depth: int, max_depth: int) -> str:
|
|
346
|
+
"""Process a block of statements for CFG with depth limiting."""
|
|
347
|
+
if depth >= max_depth:
|
|
348
|
+
# Connect directly to exit if depth exceeded
|
|
349
|
+
result['edges'].append(FlowEdge(source=entry, target=exit))
|
|
350
|
+
return exit
|
|
351
|
+
|
|
352
|
+
current = entry
|
|
353
|
+
for stmt in body:
|
|
354
|
+
if isinstance(stmt, ast.If):
|
|
355
|
+
# Create branch node
|
|
356
|
+
node_id = f"{func_name}_if_{stmt.lineno}"
|
|
357
|
+
result['nodes'][node_id] = FlowNode(
|
|
358
|
+
id=node_id, type='IF', label='if', function=func_name,
|
|
359
|
+
line=stmt.lineno
|
|
360
|
+
)
|
|
361
|
+
func_info.cfg_nodes.append(node_id)
|
|
362
|
+
result['edges'].append(FlowEdge(source=current, target=node_id))
|
|
363
|
+
|
|
364
|
+
# Process branches
|
|
365
|
+
then_exit = self._process_cfg_block(
|
|
366
|
+
stmt.body, node_id, exit, func_name, func_info, result, depth + 1, max_depth
|
|
367
|
+
)
|
|
368
|
+
if stmt.orelse:
|
|
369
|
+
else_exit = self._process_cfg_block(
|
|
370
|
+
stmt.orelse, node_id, exit, func_name, func_info, result, depth + 1, max_depth
|
|
371
|
+
)
|
|
372
|
+
else:
|
|
373
|
+
else_exit = node_id
|
|
374
|
+
|
|
375
|
+
# Merge point
|
|
376
|
+
current = f"{func_name}_merge_{stmt.lineno}"
|
|
377
|
+
result['nodes'][current] = FlowNode(
|
|
378
|
+
id=current, type='FUNC', label='merge', function=func_name
|
|
379
|
+
)
|
|
380
|
+
func_info.cfg_nodes.append(current)
|
|
381
|
+
result['edges'].append(FlowEdge(source=then_exit, target=current))
|
|
382
|
+
if else_exit != node_id:
|
|
383
|
+
result['edges'].append(FlowEdge(source=else_exit, target=current))
|
|
384
|
+
|
|
385
|
+
elif isinstance(stmt, (ast.For, ast.While)):
|
|
386
|
+
node_id = f"{func_name}_loop_{stmt.lineno}"
|
|
387
|
+
loop_type = 'FOR' if isinstance(stmt, ast.For) else 'WHILE'
|
|
388
|
+
result['nodes'][node_id] = FlowNode(
|
|
389
|
+
id=node_id, type=loop_type, label=loop_type.lower(),
|
|
390
|
+
function=func_name, line=stmt.lineno
|
|
391
|
+
)
|
|
392
|
+
func_info.cfg_nodes.append(node_id)
|
|
393
|
+
result['edges'].append(FlowEdge(source=current, target=node_id))
|
|
394
|
+
|
|
395
|
+
# Limit loop body depth even more
|
|
396
|
+
self._process_cfg_block(
|
|
397
|
+
stmt.body, node_id, node_id, func_name, func_info, result, depth + 2, max_depth
|
|
398
|
+
)
|
|
399
|
+
current = node_id
|
|
400
|
+
|
|
401
|
+
elif isinstance(stmt, ast.Return):
|
|
402
|
+
node_id = f"{func_name}_return_{stmt.lineno}"
|
|
403
|
+
result['nodes'][node_id] = FlowNode(
|
|
404
|
+
id=node_id, type='RETURN', label='return',
|
|
405
|
+
function=func_name, line=stmt.lineno
|
|
406
|
+
)
|
|
407
|
+
func_info.cfg_nodes.append(node_id)
|
|
408
|
+
result['edges'].append(FlowEdge(source=current, target=node_id))
|
|
409
|
+
result['edges'].append(FlowEdge(source=node_id, target=exit))
|
|
410
|
+
return exit
|
|
411
|
+
|
|
412
|
+
if current != exit:
|
|
413
|
+
result['edges'].append(FlowEdge(source=current, target=exit))
|
|
414
|
+
|
|
415
|
+
return exit
|
|
416
|
+
|
|
417
|
+
def _get_base_name(self, node: ast.expr) -> str:
|
|
418
|
+
"""Extract base class name."""
|
|
419
|
+
if isinstance(node, ast.Name):
|
|
420
|
+
return node.id
|
|
421
|
+
elif isinstance(node, ast.Attribute):
|
|
422
|
+
return f"{self._get_base_name(node.value)}.{node.attr}"
|
|
423
|
+
return str(node)
|
|
424
|
+
|
|
425
|
+
def _get_decorator_name(self, node: ast.expr) -> str:
|
|
426
|
+
"""Extract decorator name."""
|
|
427
|
+
if isinstance(node, ast.Name):
|
|
428
|
+
return node.id
|
|
429
|
+
elif isinstance(node, ast.Call) and isinstance(node.func, ast.Name):
|
|
430
|
+
return node.func.id
|
|
431
|
+
return ""
|
|
432
|
+
|
|
433
|
+
def _get_call_name(self, node: ast.expr) -> Optional[str]:
|
|
434
|
+
"""Extract function name from call."""
|
|
435
|
+
if isinstance(node, ast.Name):
|
|
436
|
+
return node.id
|
|
437
|
+
elif isinstance(node, ast.Attribute):
|
|
438
|
+
return f"{self._get_call_name(node.value)}.{node.attr}"
|
|
439
|
+
return None
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def _analyze_single_file(args):
|
|
443
|
+
"""Analyze single file - module level function for pickle compatibility."""
|
|
444
|
+
file_path, module_name, config_dict = args
|
|
445
|
+
from .config import Config
|
|
446
|
+
config = Config(**config_dict)
|
|
447
|
+
analyzer = FileAnalyzer(config, None)
|
|
448
|
+
return analyzer.analyze_file(file_path, module_name)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
class ProjectAnalyzer:
|
|
452
|
+
"""Main analyzer with parallel processing."""
|
|
453
|
+
|
|
454
|
+
def __init__(self, config: Optional[Config] = None):
|
|
455
|
+
self.config = config or FAST_CONFIG
|
|
456
|
+
self.cache = FileCache(
|
|
457
|
+
self.config.performance.cache_dir,
|
|
458
|
+
self.config.performance.cache_ttl_hours
|
|
459
|
+
) if self.config.performance.enable_cache else None
|
|
460
|
+
self.file_filter = FastFileFilter(self.config.filters)
|
|
461
|
+
|
|
462
|
+
def analyze_project(self, project_path: str) -> AnalysisResult:
|
|
463
|
+
"""Analyze entire project."""
|
|
464
|
+
start_time = time.time()
|
|
465
|
+
|
|
466
|
+
project_path = Path(project_path).resolve()
|
|
467
|
+
if not project_path.exists():
|
|
468
|
+
raise FileNotFoundError(f"Project path does not exist: {project_path}")
|
|
469
|
+
|
|
470
|
+
# Collect Python files
|
|
471
|
+
files = self._collect_files(project_path)
|
|
472
|
+
|
|
473
|
+
if self.config.verbose:
|
|
474
|
+
print(f"Found {len(files)} files to analyze")
|
|
475
|
+
|
|
476
|
+
# Analyze files
|
|
477
|
+
if self.config.performance.parallel_enabled and len(files) > 1:
|
|
478
|
+
results = self._analyze_parallel(files)
|
|
479
|
+
else:
|
|
480
|
+
results = self._analyze_sequential(files)
|
|
481
|
+
|
|
482
|
+
# Merge results
|
|
483
|
+
merged = self._merge_results(results, str(project_path))
|
|
484
|
+
|
|
485
|
+
# Build call graph
|
|
486
|
+
self._build_call_graph(merged)
|
|
487
|
+
|
|
488
|
+
if not self.config.performance.skip_pattern_detection:
|
|
489
|
+
self._detect_patterns(merged)
|
|
490
|
+
|
|
491
|
+
# New: Refactoring analysis
|
|
492
|
+
self._perform_refactoring_analysis(merged)
|
|
493
|
+
|
|
494
|
+
# Calculate stats
|
|
495
|
+
elapsed = time.time() - start_time
|
|
496
|
+
merged.stats = {
|
|
497
|
+
'files_processed': len(files),
|
|
498
|
+
'functions_found': len(merged.functions),
|
|
499
|
+
'classes_found': len(merged.classes),
|
|
500
|
+
'nodes_created': len(merged.nodes),
|
|
501
|
+
'edges_created': len(merged.edges),
|
|
502
|
+
'patterns_detected': len(merged.patterns),
|
|
503
|
+
'analysis_time_seconds': round(elapsed, 2),
|
|
504
|
+
'cache_hits': sum(r.get('cache_hits', 0) for r in results),
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
if self.config.verbose:
|
|
508
|
+
print(f"Analysis complete in {elapsed:.2f}s")
|
|
509
|
+
print(f" Functions: {len(merged.functions)}")
|
|
510
|
+
print(f" Classes: {len(merged.classes)}")
|
|
511
|
+
print(f" CFG Nodes: {len(merged.nodes)}")
|
|
512
|
+
print(f" Patterns: {len(merged.patterns)}")
|
|
513
|
+
|
|
514
|
+
return merged
|
|
515
|
+
|
|
516
|
+
def _collect_files(self, project_path: Path) -> List[Tuple[str, str]]:
|
|
517
|
+
"""Collect all Python files with their module names."""
|
|
518
|
+
files = []
|
|
519
|
+
|
|
520
|
+
for py_file in project_path.rglob("*.py"):
|
|
521
|
+
file_str = str(py_file)
|
|
522
|
+
if not self.file_filter.should_process(file_str):
|
|
523
|
+
continue
|
|
524
|
+
|
|
525
|
+
# Calculate module name
|
|
526
|
+
rel_path = py_file.relative_to(project_path)
|
|
527
|
+
parts = list(rel_path.parts)[:-1] # Remove .py
|
|
528
|
+
if py_file.name == '__init__.py':
|
|
529
|
+
module_name = '.'.join(parts) if parts else project_path.name
|
|
530
|
+
else:
|
|
531
|
+
module_name = '.'.join(parts + [py_file.stem])
|
|
532
|
+
|
|
533
|
+
files.append((file_str, module_name))
|
|
534
|
+
|
|
535
|
+
return files
|
|
536
|
+
|
|
537
|
+
def _analyze_parallel(self, files: List[Tuple[str, str]]) -> List[Dict]:
|
|
538
|
+
"""Analyze files in parallel."""
|
|
539
|
+
results = []
|
|
540
|
+
workers = min(self.config.performance.parallel_workers, len(files))
|
|
541
|
+
|
|
542
|
+
# Convert config to dict for pickle compatibility
|
|
543
|
+
config_dict = {
|
|
544
|
+
'mode': self.config.mode,
|
|
545
|
+
'max_depth_enumeration': self.config.max_depth_enumeration,
|
|
546
|
+
'detect_state_machines': self.config.detect_state_machines,
|
|
547
|
+
'detect_recursion': self.config.detect_recursion,
|
|
548
|
+
'output_dir': self.config.output_dir,
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
# Prepare args with config dict
|
|
552
|
+
args_list = [(f[0], f[1], config_dict) for f in files]
|
|
553
|
+
|
|
554
|
+
with ProcessPoolExecutor(max_workers=workers) as executor:
|
|
555
|
+
futures = {executor.submit(_analyze_single_file, a): a for a in args_list}
|
|
556
|
+
|
|
557
|
+
for future in as_completed(futures):
|
|
558
|
+
try:
|
|
559
|
+
result = future.result()
|
|
560
|
+
if result:
|
|
561
|
+
results.append(result)
|
|
562
|
+
except Exception as e:
|
|
563
|
+
if self.config.verbose:
|
|
564
|
+
print(f"Error analyzing {futures[future]}: {e}")
|
|
565
|
+
|
|
566
|
+
return results
|
|
567
|
+
|
|
568
|
+
def _analyze_sequential(self, files: List[Tuple[str, str]]) -> List[Dict]:
|
|
569
|
+
"""Analyze files sequentially."""
|
|
570
|
+
results = []
|
|
571
|
+
analyzer = FileAnalyzer(self.config, self.cache)
|
|
572
|
+
|
|
573
|
+
for file_path, module_name in files:
|
|
574
|
+
result = analyzer.analyze_file(file_path, module_name)
|
|
575
|
+
if result:
|
|
576
|
+
results.append(result)
|
|
577
|
+
|
|
578
|
+
return results
|
|
579
|
+
|
|
580
|
+
def _merge_results(self, results: List[Dict], project_path: str) -> AnalysisResult:
|
|
581
|
+
"""Merge all file analysis results."""
|
|
582
|
+
merged = AnalysisResult(
|
|
583
|
+
project_path=project_path,
|
|
584
|
+
analysis_mode=self.config.mode,
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
for r in results:
|
|
588
|
+
if 'module' in r:
|
|
589
|
+
mod = r['module']
|
|
590
|
+
merged.modules[mod.name] = mod
|
|
591
|
+
if 'functions' in r:
|
|
592
|
+
merged.functions.update(r['functions'])
|
|
593
|
+
if 'classes' in r:
|
|
594
|
+
merged.classes.update(r['classes'])
|
|
595
|
+
if 'nodes' in r:
|
|
596
|
+
merged.nodes.update(r['nodes'])
|
|
597
|
+
if 'edges' in r:
|
|
598
|
+
merged.edges.extend(r['edges'])
|
|
599
|
+
if 'mutations' in r:
|
|
600
|
+
merged.mutations.extend(r['mutations'])
|
|
601
|
+
if 'data_flows' in r:
|
|
602
|
+
merged.data_flows.update(r['data_flows'])
|
|
603
|
+
|
|
604
|
+
return merged
|
|
605
|
+
|
|
606
|
+
def _build_call_graph(self, result: AnalysisResult) -> None:
|
|
607
|
+
"""Build call graph and find entry points."""
|
|
608
|
+
# Map calls between functions
|
|
609
|
+
for func_name, func in result.functions.items():
|
|
610
|
+
for called in func.calls:
|
|
611
|
+
# Try to resolve to a known function
|
|
612
|
+
for known_name in result.functions:
|
|
613
|
+
if known_name.endswith(f".{called}") or known_name == called:
|
|
614
|
+
func.calls[func.calls.index(called)] = known_name
|
|
615
|
+
result.functions[known_name].called_by.append(func_name)
|
|
616
|
+
break
|
|
617
|
+
|
|
618
|
+
# Find entry points (not called by anything)
|
|
619
|
+
for func_name, func in result.functions.items():
|
|
620
|
+
if not func.called_by:
|
|
621
|
+
result.entry_points.append(func_name)
|
|
622
|
+
|
|
623
|
+
def _detect_patterns(self, result: AnalysisResult) -> None:
|
|
624
|
+
"""Detect behavioral patterns."""
|
|
625
|
+
# Detect recursion
|
|
626
|
+
for func_name, func in result.functions.items():
|
|
627
|
+
if func_name in func.calls:
|
|
628
|
+
result.patterns.append(Pattern(
|
|
629
|
+
name=f"recursion_{func.name}",
|
|
630
|
+
type="recursion",
|
|
631
|
+
confidence=0.9,
|
|
632
|
+
functions=[func_name],
|
|
633
|
+
entry_points=[func_name],
|
|
634
|
+
))
|
|
635
|
+
|
|
636
|
+
# Detect state machines (simple heuristic)
|
|
637
|
+
for class_name, cls in result.classes.items():
|
|
638
|
+
state_methods = [m for m in cls.methods if any(
|
|
639
|
+
s in m.lower() for s in ['state', 'transition', 'enter', 'exit', 'connect', 'disconnect']
|
|
640
|
+
)]
|
|
641
|
+
if len(state_methods) >= 2:
|
|
642
|
+
cls.is_state_machine = True
|
|
643
|
+
result.patterns.append(Pattern(
|
|
644
|
+
name=f"state_machine_{cls.name}",
|
|
645
|
+
type="state_machine",
|
|
646
|
+
confidence=0.7,
|
|
647
|
+
functions=cls.methods,
|
|
648
|
+
entry_points=cls.methods[:1],
|
|
649
|
+
))
|
|
650
|
+
|
|
651
|
+
def _perform_refactoring_analysis(self, result: AnalysisResult) -> None:
|
|
652
|
+
"""Perform deep analysis and detect code smells."""
|
|
653
|
+
if self.config.verbose:
|
|
654
|
+
print("Performing refactoring analysis...")
|
|
655
|
+
|
|
656
|
+
# 1. Calculate metrics (fan-in/fan-out)
|
|
657
|
+
cg_ext = CallGraphExtractor(self.config)
|
|
658
|
+
cg_ext.result = result
|
|
659
|
+
cg_ext._calculate_metrics()
|
|
660
|
+
|
|
661
|
+
# 2. Build networkx graph for project-level analysis
|
|
662
|
+
G = nx.DiGraph()
|
|
663
|
+
for func_name, func_info in result.functions.items():
|
|
664
|
+
G.add_node(func_name)
|
|
665
|
+
for callee in func_info.calls:
|
|
666
|
+
G.add_edge(func_name, callee)
|
|
667
|
+
|
|
668
|
+
# 3. Calculate Betweenness Centrality (Bottlenecks)
|
|
669
|
+
if len(G) > 0:
|
|
670
|
+
try:
|
|
671
|
+
centrality = nx.betweenness_centrality(G)
|
|
672
|
+
for func_name, score in centrality.items():
|
|
673
|
+
if func_name in result.functions:
|
|
674
|
+
result.functions[func_name].centrality = score
|
|
675
|
+
except Exception as e:
|
|
676
|
+
if self.config.verbose:
|
|
677
|
+
print(f"Error calculating centrality: {e}")
|
|
678
|
+
|
|
679
|
+
# 4. Detect Circular Dependencies
|
|
680
|
+
try:
|
|
681
|
+
cycles = list(nx.simple_cycles(G))
|
|
682
|
+
if cycles:
|
|
683
|
+
result.metrics["project"] = result.metrics.get("project", {})
|
|
684
|
+
result.metrics["project"]["circular_dependencies"] = cycles
|
|
685
|
+
except Exception as e:
|
|
686
|
+
if self.config.verbose:
|
|
687
|
+
print(f"Error detecting cycles: {e}")
|
|
688
|
+
|
|
689
|
+
# 5. Community Detection (Module groups)
|
|
690
|
+
try:
|
|
691
|
+
from networkx.algorithms import community
|
|
692
|
+
# Using Louvain if available, otherwise greedy modularity
|
|
693
|
+
if hasattr(community, 'louvain_communities'):
|
|
694
|
+
communities = community.louvain_communities(G.to_undirected())
|
|
695
|
+
else:
|
|
696
|
+
communities = community.greedy_modularity_communities(G.to_undirected())
|
|
697
|
+
|
|
698
|
+
result.coupling["communities"] = [list(c) for c in communities]
|
|
699
|
+
except Exception as e:
|
|
700
|
+
if self.config.verbose:
|
|
701
|
+
print(f"Error in community detection: {e}")
|
|
702
|
+
|
|
703
|
+
# 6. Analyze coupling
|
|
704
|
+
coupling_analyzer = CouplingAnalyzer(result)
|
|
705
|
+
coupling_analyzer.analyze()
|
|
706
|
+
|
|
707
|
+
# 7. Detect code smells
|
|
708
|
+
smell_detector = SmellDetector(result)
|
|
709
|
+
smell_detector.detect()
|
|
710
|
+
|
|
711
|
+
# 8. Dead code detection with vulture
|
|
712
|
+
self._detect_dead_code(result)
|
|
713
|
+
|
|
714
|
+
if self.config.verbose:
|
|
715
|
+
print(f" Detected {len(result.smells)} code smells")
|
|
716
|
+
|
|
717
|
+
def _detect_dead_code(self, result: AnalysisResult) -> None:
|
|
718
|
+
"""Use vulture to find dead code and update reachability."""
|
|
719
|
+
if self.config.verbose:
|
|
720
|
+
print("Detecting dead code with vulture...")
|
|
721
|
+
|
|
722
|
+
try:
|
|
723
|
+
v = vulture.Vulture(verbose=False)
|
|
724
|
+
|
|
725
|
+
# vulture.scan takes the code content as a string
|
|
726
|
+
for py_file in Path(result.project_path).rglob("*.py"):
|
|
727
|
+
if not self.file_filter.should_process(str(py_file)):
|
|
728
|
+
continue
|
|
729
|
+
try:
|
|
730
|
+
content = py_file.read_text(encoding='utf-8', errors='ignore')
|
|
731
|
+
v.scan(content, filename=str(py_file))
|
|
732
|
+
except Exception:
|
|
733
|
+
continue
|
|
734
|
+
|
|
735
|
+
dead_code = v.get_unused_code()
|
|
736
|
+
|
|
737
|
+
if self.config.verbose:
|
|
738
|
+
print(f" Vulture found {len(dead_code)} unused items")
|
|
739
|
+
|
|
740
|
+
# Map unused code to our functions/classes
|
|
741
|
+
for item in dead_code:
|
|
742
|
+
if self.config.verbose:
|
|
743
|
+
item_lineno = getattr(item, 'lineno', getattr(item, 'first_lineno', 0))
|
|
744
|
+
print(f" Vulture item: {item.filename}:{item_lineno} ({item.typ})")
|
|
745
|
+
|
|
746
|
+
# Match by file and line
|
|
747
|
+
item_path = Path(item.filename).resolve()
|
|
748
|
+
item_lineno = getattr(item, 'lineno', getattr(item, 'first_lineno', 0))
|
|
749
|
+
for func_name, func_info in result.functions.items():
|
|
750
|
+
func_path = Path(func_info.file).resolve()
|
|
751
|
+
if func_path == item_path and func_info.line == item_lineno:
|
|
752
|
+
func_info.reachability = "unreachable"
|
|
753
|
+
|
|
754
|
+
for class_name, class_info in result.classes.items():
|
|
755
|
+
if Path(class_info.file).resolve() == Path(item.filename).resolve() and class_info.line == item.lineno:
|
|
756
|
+
class_info.reachability = "unreachable" # (if we add reachability to ClassInfo too)
|
|
757
|
+
|
|
758
|
+
# Mark others as reachable if they are NOT orphans
|
|
759
|
+
for func_name, func_info in result.functions.items():
|
|
760
|
+
if func_info.reachability == "unknown":
|
|
761
|
+
if func_info.called_by or func_name in result.entry_points:
|
|
762
|
+
func_info.reachability = "reachable"
|
|
763
|
+
except Exception as e:
|
|
764
|
+
if self.config.verbose:
|
|
765
|
+
print(f"Error in dead code detection: {e}")
|