code2flow-toon 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. code2flow/__init__.py +47 -0
  2. code2flow/__main__.py +6 -0
  3. code2flow/analysis/__init__.py +17 -0
  4. code2flow/analysis/call_graph.py +210 -0
  5. code2flow/analysis/cfg.py +293 -0
  6. code2flow/analysis/coupling.py +77 -0
  7. code2flow/analysis/data_analysis.py +249 -0
  8. code2flow/analysis/dfg.py +224 -0
  9. code2flow/analysis/smells.py +192 -0
  10. code2flow/cli.py +464 -0
  11. code2flow/core/__init__.py +36 -0
  12. code2flow/core/analyzer.py +765 -0
  13. code2flow/core/config.py +177 -0
  14. code2flow/core/models.py +194 -0
  15. code2flow/core/streaming_analyzer.py +666 -0
  16. code2flow/exporters/__init__.py +17 -0
  17. code2flow/exporters/base.py +13 -0
  18. code2flow/exporters/json_exporter.py +17 -0
  19. code2flow/exporters/llm_exporter.py +199 -0
  20. code2flow/exporters/mermaid_exporter.py +67 -0
  21. code2flow/exporters/toon.py +401 -0
  22. code2flow/exporters/yaml_exporter.py +108 -0
  23. code2flow/llm_flow_generator.py +451 -0
  24. code2flow/llm_task_generator.py +263 -0
  25. code2flow/mermaid_generator.py +481 -0
  26. code2flow/nlp/__init__.py +23 -0
  27. code2flow/nlp/config.py +174 -0
  28. code2flow/nlp/entity_resolution.py +326 -0
  29. code2flow/nlp/intent_matching.py +297 -0
  30. code2flow/nlp/normalization.py +122 -0
  31. code2flow/nlp/pipeline.py +388 -0
  32. code2flow/patterns/__init__.py +0 -0
  33. code2flow/patterns/detector.py +168 -0
  34. code2flow/refactor/__init__.py +0 -0
  35. code2flow/refactor/prompt_engine.py +150 -0
  36. code2flow/visualizers/__init__.py +0 -0
  37. code2flow/visualizers/graph.py +196 -0
  38. code2flow_toon-0.2.4.dist-info/METADATA +599 -0
  39. code2flow_toon-0.2.4.dist-info/RECORD +43 -0
  40. code2flow_toon-0.2.4.dist-info/WHEEL +5 -0
  41. code2flow_toon-0.2.4.dist-info/entry_points.txt +2 -0
  42. code2flow_toon-0.2.4.dist-info/licenses/LICENSE +201 -0
  43. code2flow_toon-0.2.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,765 @@
1
+ """Optimized project analyzer with caching and parallel processing."""
2
+
3
+ import ast
4
+ import hashlib
5
+ import json
6
+ import os
7
+ import pickle
8
+ import time
9
+ from concurrent.futures import ProcessPoolExecutor, as_completed
10
+ from functools import lru_cache
11
+ from pathlib import Path
12
+ from typing import Dict, List, Optional, Set, Tuple
13
+ from radon.complexity import cc_visit, cc_rank
14
+ import fnmatch
15
+ import networkx as nx
16
+ import vulture
17
+
18
+ from .config import Config, FAST_CONFIG, FilterConfig
19
+ from .models import (
20
+ AnalysisResult, ClassInfo, FlowEdge, FlowNode,
21
+ FunctionInfo, ModuleInfo, Pattern
22
+ )
23
+ from ..analysis.dfg import DFGExtractor
24
+ from ..analysis.call_graph import CallGraphExtractor
25
+ from ..analysis.coupling import CouplingAnalyzer
26
+ from ..analysis.smells import SmellDetector
27
+
28
+
29
+ class FileCache:
30
+ """Cache for parsed AST files."""
31
+
32
+ def __init__(self, cache_dir: str = ".code2flow_cache", ttl_hours: int = 24):
33
+ self.cache_dir = Path(cache_dir)
34
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
35
+ self.ttl_seconds = ttl_hours * 3600
36
+
37
+ def _get_cache_key(self, file_path: str, content: str) -> str:
38
+ """Generate cache key from file path and content hash."""
39
+ content_hash = hashlib.md5(content.encode()).hexdigest()[:16]
40
+ return f"{Path(file_path).stem}_{content_hash}"
41
+
42
+ def _get_cache_path(self, cache_key: str) -> Path:
43
+ """Get cache file path."""
44
+ return self.cache_dir / f"{cache_key}.pkl"
45
+
46
+ def get(self, file_path: str, content: str) -> Optional[Tuple[ast.AST, str]]:
47
+ """Get cached AST if available and not expired."""
48
+ cache_key = self._get_cache_key(file_path, content)
49
+ cache_path = self._get_cache_path(cache_key)
50
+
51
+ if not cache_path.exists():
52
+ return None
53
+
54
+ # Check TTL
55
+ age = time.time() - cache_path.stat().st_mtime
56
+ if age > self.ttl_seconds:
57
+ cache_path.unlink()
58
+ return None
59
+
60
+ try:
61
+ with open(cache_path, 'rb') as f:
62
+ return pickle.load(f)
63
+ except Exception:
64
+ return None
65
+
66
+ def put(self, file_path: str, content: str, data: Tuple[ast.AST, str]) -> None:
67
+ """Store AST in cache."""
68
+ cache_key = self._get_cache_key(file_path, content)
69
+ cache_path = self._get_cache_path(cache_key)
70
+
71
+ try:
72
+ with open(cache_path, 'wb') as f:
73
+ pickle.dump(data, f)
74
+ except Exception:
75
+ pass
76
+
77
+ def clear(self) -> None:
78
+ """Clear all cached files."""
79
+ for f in self.cache_dir.glob("*.pkl"):
80
+ f.unlink()
81
+
82
+
83
+ class FastFileFilter:
84
+ """Fast file filtering with pattern matching."""
85
+
86
+ def __init__(self, config: FilterConfig):
87
+ self.config = config
88
+ self._exclude_patterns = [p.lower() for p in config.exclude_patterns]
89
+ self._include_patterns = [p.lower() for p in config.include_patterns]
90
+
91
+ def should_process(self, file_path: str) -> bool:
92
+ """Check if file should be processed."""
93
+ path_lower = file_path.lower()
94
+
95
+ # Check exclude patterns
96
+ for pattern in self._exclude_patterns:
97
+ if fnmatch.fnmatch(path_lower, pattern) or pattern in path_lower:
98
+ return False
99
+
100
+ # Check include patterns (if any)
101
+ if self._include_patterns:
102
+ return any(
103
+ fnmatch.fnmatch(path_lower, p) or p in path_lower
104
+ for p in self._include_patterns
105
+ )
106
+
107
+ return True
108
+
109
+ def should_skip_function(self, name: str, line_count: int, is_private: bool = False,
110
+ is_property: bool = False, is_accessor: bool = False) -> bool:
111
+ """Check if function should be skipped."""
112
+ if line_count < self.config.min_function_lines:
113
+ return True
114
+ if self.config.skip_private and is_private:
115
+ return True
116
+ if self.config.skip_properties and is_property:
117
+ return True
118
+ if self.config.skip_accessors and is_accessor:
119
+ return True
120
+ return False
121
+
122
+
123
+ class FileAnalyzer:
124
+ """Analyzes a single file."""
125
+
126
+ def __init__(self, config: Config, cache: Optional[FileCache] = None):
127
+ self.config = config
128
+ self.cache = cache
129
+ self.stats = {
130
+ 'files_processed': 0,
131
+ 'functions_found': 0,
132
+ 'classes_found': 0,
133
+ 'nodes_created': 0,
134
+ 'cache_hits': 0,
135
+ }
136
+
137
+ def analyze_file(self, file_path: str, module_name: str) -> Dict:
138
+ """Analyze a single Python file."""
139
+ path = Path(file_path)
140
+ if not path.exists():
141
+ return {}
142
+
143
+ try:
144
+ content = path.read_text(encoding='utf-8', errors='ignore')
145
+ except Exception:
146
+ return {}
147
+
148
+ # Try cache
149
+ if self.cache and self.config.performance.enable_cache:
150
+ cached = self.cache.get(file_path, content)
151
+ if cached:
152
+ self.stats['cache_hits'] += 1
153
+ ast_tree, _ = cached
154
+ else:
155
+ try:
156
+ ast_tree = ast.parse(content)
157
+ self.cache.put(file_path, content, (ast_tree, content))
158
+ except SyntaxError:
159
+ return {}
160
+ else:
161
+ try:
162
+ ast_tree = ast.parse(content)
163
+ except SyntaxError:
164
+ return {}
165
+
166
+ result = self._analyze_ast(ast_tree, file_path, module_name, content)
167
+ self.stats['files_processed'] += 1
168
+ return result
169
+
170
+ def _analyze_ast(self, tree: ast.AST, file_path: str, module_name: str, content: str) -> Dict:
171
+ """Analyze AST and extract structure."""
172
+ result = {
173
+ 'module': ModuleInfo(
174
+ name=module_name,
175
+ file=file_path,
176
+ is_package=Path(file_path).name == '__init__.py'
177
+ ),
178
+ 'functions': {},
179
+ 'classes': {},
180
+ 'nodes': {},
181
+ 'edges': [],
182
+ }
183
+
184
+ lines = content.split('\n')
185
+
186
+ for node in tree.body:
187
+ if isinstance(node, ast.ClassDef):
188
+ self._process_class(node, file_path, module_name, result, lines)
189
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
190
+ self._process_function(node, file_path, module_name, result, lines, None)
191
+
192
+ # Calculate complexity with radon
193
+ try:
194
+ complexity_results = cc_visit(content)
195
+ for entry in complexity_results:
196
+ # Radon returns a list of objects (Function, Class, Method)
197
+ name = getattr(entry, 'name', '')
198
+ classname = getattr(entry, 'classname', None)
199
+
200
+ if classname:
201
+ full_name = f"{module_name}.{classname}.{name}"
202
+ else:
203
+ full_name = f"{module_name}.{name}"
204
+
205
+ if full_name in result['functions']:
206
+ result['functions'][full_name].complexity = {
207
+ 'cyclomatic_complexity': entry.complexity,
208
+ 'cc_rank': cc_rank(entry.complexity)
209
+ }
210
+ elif full_name in result['classes']:
211
+ # We can store class complexity too if needed
212
+ result['classes'][full_name].is_state_machine = result['classes'][full_name].is_state_machine or (entry.complexity > 20)
213
+ except Exception as e:
214
+ if self.config.verbose:
215
+ print(f"Error calculating complexity for {file_path}: {e}")
216
+
217
+ # New: Deep Analysis for refactoring
218
+ try:
219
+ dfg_ext = DFGExtractor(self.config)
220
+ dfg_res = dfg_ext.extract(tree, module_name, file_path)
221
+ result['mutations'] = dfg_res.mutations
222
+ result['data_flows'] = dfg_res.data_flows
223
+
224
+ # Update function calls from CG extractor which is more robust
225
+ cg_ext = CallGraphExtractor(self.config)
226
+ cg_res = cg_ext.extract(tree, module_name, file_path)
227
+ for func_name, cg_func in cg_res.functions.items():
228
+ if func_name in result['functions']:
229
+ result['functions'][func_name].calls.extend(list(cg_func.calls))
230
+ except Exception as e:
231
+ if self.config.verbose:
232
+ print(f"Error in deep analysis for {file_path}: {e}")
233
+
234
+ self.stats['files_processed'] += 1
235
+ return result
236
+
237
+ def _process_class(self, node: ast.ClassDef, file_path: str, module_name: str,
238
+ result: Dict, lines: List[str]) -> None:
239
+ """Process class definition."""
240
+ class_name = node.name
241
+ qualified_name = f"{module_name}.{class_name}"
242
+
243
+ methods = []
244
+ for item in node.body:
245
+ if isinstance(item, ast.FunctionDef) or isinstance(item, ast.AsyncFunctionDef):
246
+ method_name = item.name
247
+ qualified_method = f"{qualified_name}.{method_name}"
248
+ methods.append(qualified_method)
249
+ self._process_function(item, file_path, module_name, result, lines, class_name)
250
+
251
+ result['classes'][qualified_name] = ClassInfo(
252
+ name=class_name,
253
+ qualified_name=qualified_name,
254
+ file=file_path,
255
+ line=node.lineno,
256
+ module=module_name,
257
+ bases=[self._get_base_name(b) for b in node.bases],
258
+ methods=methods,
259
+ docstring=ast.get_docstring(node),
260
+ )
261
+ result['module'].classes.append(qualified_name)
262
+ self.stats['classes_found'] += 1
263
+
264
+ def _process_function(self, node: ast.FunctionDef, file_path: str, module_name: str,
265
+ result: Dict, lines: List[str], class_name: Optional[str]) -> None:
266
+ """Process function definition with limited CFG depth."""
267
+ func_name = node.name
268
+ if class_name:
269
+ qualified_name = f"{module_name}.{class_name}.{func_name}"
270
+ else:
271
+ qualified_name = f"{module_name}.{func_name}"
272
+
273
+ # Check filtering - use FastFileFilter for function-level filtering
274
+ line_count = (node.end_lineno - node.lineno + 1) if node.end_lineno else 1
275
+ is_private = func_name.startswith('_')
276
+ is_property = any(
277
+ isinstance(d, ast.Name) and d.id == 'property'
278
+ for d in node.decorator_list
279
+ )
280
+
281
+ filter_obj = FastFileFilter(self.config.filters)
282
+ if filter_obj.should_skip_function(func_name, line_count, is_private, is_property):
283
+ return
284
+
285
+ # Create function info
286
+ func_info = FunctionInfo(
287
+ name=func_name,
288
+ qualified_name=qualified_name,
289
+ file=file_path,
290
+ line=node.lineno,
291
+ column=node.col_offset,
292
+ module=module_name,
293
+ class_name=class_name,
294
+ is_method=class_name is not None,
295
+ is_private=is_private,
296
+ is_property=is_property,
297
+ docstring=ast.get_docstring(node),
298
+ args=[arg.arg for arg in node.args.args],
299
+ decorators=[self._get_decorator_name(d) for d in node.decorator_list],
300
+ )
301
+
302
+ # Build simplified CFG with depth limit
303
+ if not self.config.performance.skip_data_flow:
304
+ self._build_cfg(node, qualified_name, func_info, result)
305
+
306
+ # Find calls
307
+ for child in ast.walk(node):
308
+ if isinstance(child, ast.Call):
309
+ called_name = self._get_call_name(child.func)
310
+ if called_name:
311
+ func_info.calls.append(called_name)
312
+
313
+ result['functions'][qualified_name] = func_info
314
+ result['module'].functions.append(qualified_name)
315
+ self.stats['functions_found'] += 1
316
+
317
+ def _build_cfg(self, node: ast.FunctionDef, func_name: str,
318
+ func_info: FunctionInfo, result: Dict) -> None:
319
+ """Build simplified control flow graph with depth limit."""
320
+ max_depth = self.config.depth.max_cfg_depth
321
+
322
+ entry_id = f"{func_name}_entry"
323
+ exit_id = f"{func_name}_exit"
324
+
325
+ # Create entry/exit nodes
326
+ result['nodes'][entry_id] = FlowNode(
327
+ id=entry_id, type='ENTRY', label='entry', function=func_name
328
+ )
329
+ result['nodes'][exit_id] = FlowNode(
330
+ id=exit_id, type='EXIT', label='exit', function=func_name
331
+ )
332
+
333
+ func_info.cfg_nodes.extend([entry_id, exit_id])
334
+
335
+ func_info.cfg_entry = entry_id
336
+ func_info.cfg_exit = exit_id
337
+
338
+ # Build CFG with depth limiting
339
+ self._process_cfg_block(node.body, entry_id, exit_id, func_name,
340
+ func_info, result, depth=0, max_depth=max_depth)
341
+
342
+ self.stats['nodes_created'] += len(result['nodes'])
343
+
344
+ def _process_cfg_block(self, body: List[ast.stmt], entry: str, exit: str,
345
+ func_name: str, func_info: FunctionInfo, result: Dict, depth: int, max_depth: int) -> str:
346
+ """Process a block of statements for CFG with depth limiting."""
347
+ if depth >= max_depth:
348
+ # Connect directly to exit if depth exceeded
349
+ result['edges'].append(FlowEdge(source=entry, target=exit))
350
+ return exit
351
+
352
+ current = entry
353
+ for stmt in body:
354
+ if isinstance(stmt, ast.If):
355
+ # Create branch node
356
+ node_id = f"{func_name}_if_{stmt.lineno}"
357
+ result['nodes'][node_id] = FlowNode(
358
+ id=node_id, type='IF', label='if', function=func_name,
359
+ line=stmt.lineno
360
+ )
361
+ func_info.cfg_nodes.append(node_id)
362
+ result['edges'].append(FlowEdge(source=current, target=node_id))
363
+
364
+ # Process branches
365
+ then_exit = self._process_cfg_block(
366
+ stmt.body, node_id, exit, func_name, func_info, result, depth + 1, max_depth
367
+ )
368
+ if stmt.orelse:
369
+ else_exit = self._process_cfg_block(
370
+ stmt.orelse, node_id, exit, func_name, func_info, result, depth + 1, max_depth
371
+ )
372
+ else:
373
+ else_exit = node_id
374
+
375
+ # Merge point
376
+ current = f"{func_name}_merge_{stmt.lineno}"
377
+ result['nodes'][current] = FlowNode(
378
+ id=current, type='FUNC', label='merge', function=func_name
379
+ )
380
+ func_info.cfg_nodes.append(current)
381
+ result['edges'].append(FlowEdge(source=then_exit, target=current))
382
+ if else_exit != node_id:
383
+ result['edges'].append(FlowEdge(source=else_exit, target=current))
384
+
385
+ elif isinstance(stmt, (ast.For, ast.While)):
386
+ node_id = f"{func_name}_loop_{stmt.lineno}"
387
+ loop_type = 'FOR' if isinstance(stmt, ast.For) else 'WHILE'
388
+ result['nodes'][node_id] = FlowNode(
389
+ id=node_id, type=loop_type, label=loop_type.lower(),
390
+ function=func_name, line=stmt.lineno
391
+ )
392
+ func_info.cfg_nodes.append(node_id)
393
+ result['edges'].append(FlowEdge(source=current, target=node_id))
394
+
395
+ # Limit loop body depth even more
396
+ self._process_cfg_block(
397
+ stmt.body, node_id, node_id, func_name, func_info, result, depth + 2, max_depth
398
+ )
399
+ current = node_id
400
+
401
+ elif isinstance(stmt, ast.Return):
402
+ node_id = f"{func_name}_return_{stmt.lineno}"
403
+ result['nodes'][node_id] = FlowNode(
404
+ id=node_id, type='RETURN', label='return',
405
+ function=func_name, line=stmt.lineno
406
+ )
407
+ func_info.cfg_nodes.append(node_id)
408
+ result['edges'].append(FlowEdge(source=current, target=node_id))
409
+ result['edges'].append(FlowEdge(source=node_id, target=exit))
410
+ return exit
411
+
412
+ if current != exit:
413
+ result['edges'].append(FlowEdge(source=current, target=exit))
414
+
415
+ return exit
416
+
417
+ def _get_base_name(self, node: ast.expr) -> str:
418
+ """Extract base class name."""
419
+ if isinstance(node, ast.Name):
420
+ return node.id
421
+ elif isinstance(node, ast.Attribute):
422
+ return f"{self._get_base_name(node.value)}.{node.attr}"
423
+ return str(node)
424
+
425
+ def _get_decorator_name(self, node: ast.expr) -> str:
426
+ """Extract decorator name."""
427
+ if isinstance(node, ast.Name):
428
+ return node.id
429
+ elif isinstance(node, ast.Call) and isinstance(node.func, ast.Name):
430
+ return node.func.id
431
+ return ""
432
+
433
+ def _get_call_name(self, node: ast.expr) -> Optional[str]:
434
+ """Extract function name from call."""
435
+ if isinstance(node, ast.Name):
436
+ return node.id
437
+ elif isinstance(node, ast.Attribute):
438
+ return f"{self._get_call_name(node.value)}.{node.attr}"
439
+ return None
440
+
441
+
442
+ def _analyze_single_file(args):
443
+ """Analyze single file - module level function for pickle compatibility."""
444
+ file_path, module_name, config_dict = args
445
+ from .config import Config
446
+ config = Config(**config_dict)
447
+ analyzer = FileAnalyzer(config, None)
448
+ return analyzer.analyze_file(file_path, module_name)
449
+
450
+
451
+ class ProjectAnalyzer:
452
+ """Main analyzer with parallel processing."""
453
+
454
+ def __init__(self, config: Optional[Config] = None):
455
+ self.config = config or FAST_CONFIG
456
+ self.cache = FileCache(
457
+ self.config.performance.cache_dir,
458
+ self.config.performance.cache_ttl_hours
459
+ ) if self.config.performance.enable_cache else None
460
+ self.file_filter = FastFileFilter(self.config.filters)
461
+
462
+ def analyze_project(self, project_path: str) -> AnalysisResult:
463
+ """Analyze entire project."""
464
+ start_time = time.time()
465
+
466
+ project_path = Path(project_path).resolve()
467
+ if not project_path.exists():
468
+ raise FileNotFoundError(f"Project path does not exist: {project_path}")
469
+
470
+ # Collect Python files
471
+ files = self._collect_files(project_path)
472
+
473
+ if self.config.verbose:
474
+ print(f"Found {len(files)} files to analyze")
475
+
476
+ # Analyze files
477
+ if self.config.performance.parallel_enabled and len(files) > 1:
478
+ results = self._analyze_parallel(files)
479
+ else:
480
+ results = self._analyze_sequential(files)
481
+
482
+ # Merge results
483
+ merged = self._merge_results(results, str(project_path))
484
+
485
+ # Build call graph
486
+ self._build_call_graph(merged)
487
+
488
+ if not self.config.performance.skip_pattern_detection:
489
+ self._detect_patterns(merged)
490
+
491
+ # New: Refactoring analysis
492
+ self._perform_refactoring_analysis(merged)
493
+
494
+ # Calculate stats
495
+ elapsed = time.time() - start_time
496
+ merged.stats = {
497
+ 'files_processed': len(files),
498
+ 'functions_found': len(merged.functions),
499
+ 'classes_found': len(merged.classes),
500
+ 'nodes_created': len(merged.nodes),
501
+ 'edges_created': len(merged.edges),
502
+ 'patterns_detected': len(merged.patterns),
503
+ 'analysis_time_seconds': round(elapsed, 2),
504
+ 'cache_hits': sum(r.get('cache_hits', 0) for r in results),
505
+ }
506
+
507
+ if self.config.verbose:
508
+ print(f"Analysis complete in {elapsed:.2f}s")
509
+ print(f" Functions: {len(merged.functions)}")
510
+ print(f" Classes: {len(merged.classes)}")
511
+ print(f" CFG Nodes: {len(merged.nodes)}")
512
+ print(f" Patterns: {len(merged.patterns)}")
513
+
514
+ return merged
515
+
516
+ def _collect_files(self, project_path: Path) -> List[Tuple[str, str]]:
517
+ """Collect all Python files with their module names."""
518
+ files = []
519
+
520
+ for py_file in project_path.rglob("*.py"):
521
+ file_str = str(py_file)
522
+ if not self.file_filter.should_process(file_str):
523
+ continue
524
+
525
+ # Calculate module name
526
+ rel_path = py_file.relative_to(project_path)
527
+ parts = list(rel_path.parts)[:-1] # Remove .py
528
+ if py_file.name == '__init__.py':
529
+ module_name = '.'.join(parts) if parts else project_path.name
530
+ else:
531
+ module_name = '.'.join(parts + [py_file.stem])
532
+
533
+ files.append((file_str, module_name))
534
+
535
+ return files
536
+
537
+ def _analyze_parallel(self, files: List[Tuple[str, str]]) -> List[Dict]:
538
+ """Analyze files in parallel."""
539
+ results = []
540
+ workers = min(self.config.performance.parallel_workers, len(files))
541
+
542
+ # Convert config to dict for pickle compatibility
543
+ config_dict = {
544
+ 'mode': self.config.mode,
545
+ 'max_depth_enumeration': self.config.max_depth_enumeration,
546
+ 'detect_state_machines': self.config.detect_state_machines,
547
+ 'detect_recursion': self.config.detect_recursion,
548
+ 'output_dir': self.config.output_dir,
549
+ }
550
+
551
+ # Prepare args with config dict
552
+ args_list = [(f[0], f[1], config_dict) for f in files]
553
+
554
+ with ProcessPoolExecutor(max_workers=workers) as executor:
555
+ futures = {executor.submit(_analyze_single_file, a): a for a in args_list}
556
+
557
+ for future in as_completed(futures):
558
+ try:
559
+ result = future.result()
560
+ if result:
561
+ results.append(result)
562
+ except Exception as e:
563
+ if self.config.verbose:
564
+ print(f"Error analyzing {futures[future]}: {e}")
565
+
566
+ return results
567
+
568
+ def _analyze_sequential(self, files: List[Tuple[str, str]]) -> List[Dict]:
569
+ """Analyze files sequentially."""
570
+ results = []
571
+ analyzer = FileAnalyzer(self.config, self.cache)
572
+
573
+ for file_path, module_name in files:
574
+ result = analyzer.analyze_file(file_path, module_name)
575
+ if result:
576
+ results.append(result)
577
+
578
+ return results
579
+
580
+ def _merge_results(self, results: List[Dict], project_path: str) -> AnalysisResult:
581
+ """Merge all file analysis results."""
582
+ merged = AnalysisResult(
583
+ project_path=project_path,
584
+ analysis_mode=self.config.mode,
585
+ )
586
+
587
+ for r in results:
588
+ if 'module' in r:
589
+ mod = r['module']
590
+ merged.modules[mod.name] = mod
591
+ if 'functions' in r:
592
+ merged.functions.update(r['functions'])
593
+ if 'classes' in r:
594
+ merged.classes.update(r['classes'])
595
+ if 'nodes' in r:
596
+ merged.nodes.update(r['nodes'])
597
+ if 'edges' in r:
598
+ merged.edges.extend(r['edges'])
599
+ if 'mutations' in r:
600
+ merged.mutations.extend(r['mutations'])
601
+ if 'data_flows' in r:
602
+ merged.data_flows.update(r['data_flows'])
603
+
604
+ return merged
605
+
606
+ def _build_call_graph(self, result: AnalysisResult) -> None:
607
+ """Build call graph and find entry points."""
608
+ # Map calls between functions
609
+ for func_name, func in result.functions.items():
610
+ for called in func.calls:
611
+ # Try to resolve to a known function
612
+ for known_name in result.functions:
613
+ if known_name.endswith(f".{called}") or known_name == called:
614
+ func.calls[func.calls.index(called)] = known_name
615
+ result.functions[known_name].called_by.append(func_name)
616
+ break
617
+
618
+ # Find entry points (not called by anything)
619
+ for func_name, func in result.functions.items():
620
+ if not func.called_by:
621
+ result.entry_points.append(func_name)
622
+
623
+ def _detect_patterns(self, result: AnalysisResult) -> None:
624
+ """Detect behavioral patterns."""
625
+ # Detect recursion
626
+ for func_name, func in result.functions.items():
627
+ if func_name in func.calls:
628
+ result.patterns.append(Pattern(
629
+ name=f"recursion_{func.name}",
630
+ type="recursion",
631
+ confidence=0.9,
632
+ functions=[func_name],
633
+ entry_points=[func_name],
634
+ ))
635
+
636
+ # Detect state machines (simple heuristic)
637
+ for class_name, cls in result.classes.items():
638
+ state_methods = [m for m in cls.methods if any(
639
+ s in m.lower() for s in ['state', 'transition', 'enter', 'exit', 'connect', 'disconnect']
640
+ )]
641
+ if len(state_methods) >= 2:
642
+ cls.is_state_machine = True
643
+ result.patterns.append(Pattern(
644
+ name=f"state_machine_{cls.name}",
645
+ type="state_machine",
646
+ confidence=0.7,
647
+ functions=cls.methods,
648
+ entry_points=cls.methods[:1],
649
+ ))
650
+
651
+ def _perform_refactoring_analysis(self, result: AnalysisResult) -> None:
652
+ """Perform deep analysis and detect code smells."""
653
+ if self.config.verbose:
654
+ print("Performing refactoring analysis...")
655
+
656
+ # 1. Calculate metrics (fan-in/fan-out)
657
+ cg_ext = CallGraphExtractor(self.config)
658
+ cg_ext.result = result
659
+ cg_ext._calculate_metrics()
660
+
661
+ # 2. Build networkx graph for project-level analysis
662
+ G = nx.DiGraph()
663
+ for func_name, func_info in result.functions.items():
664
+ G.add_node(func_name)
665
+ for callee in func_info.calls:
666
+ G.add_edge(func_name, callee)
667
+
668
+ # 3. Calculate Betweenness Centrality (Bottlenecks)
669
+ if len(G) > 0:
670
+ try:
671
+ centrality = nx.betweenness_centrality(G)
672
+ for func_name, score in centrality.items():
673
+ if func_name in result.functions:
674
+ result.functions[func_name].centrality = score
675
+ except Exception as e:
676
+ if self.config.verbose:
677
+ print(f"Error calculating centrality: {e}")
678
+
679
+ # 4. Detect Circular Dependencies
680
+ try:
681
+ cycles = list(nx.simple_cycles(G))
682
+ if cycles:
683
+ result.metrics["project"] = result.metrics.get("project", {})
684
+ result.metrics["project"]["circular_dependencies"] = cycles
685
+ except Exception as e:
686
+ if self.config.verbose:
687
+ print(f"Error detecting cycles: {e}")
688
+
689
+ # 5. Community Detection (Module groups)
690
+ try:
691
+ from networkx.algorithms import community
692
+ # Using Louvain if available, otherwise greedy modularity
693
+ if hasattr(community, 'louvain_communities'):
694
+ communities = community.louvain_communities(G.to_undirected())
695
+ else:
696
+ communities = community.greedy_modularity_communities(G.to_undirected())
697
+
698
+ result.coupling["communities"] = [list(c) for c in communities]
699
+ except Exception as e:
700
+ if self.config.verbose:
701
+ print(f"Error in community detection: {e}")
702
+
703
+ # 6. Analyze coupling
704
+ coupling_analyzer = CouplingAnalyzer(result)
705
+ coupling_analyzer.analyze()
706
+
707
+ # 7. Detect code smells
708
+ smell_detector = SmellDetector(result)
709
+ smell_detector.detect()
710
+
711
+ # 8. Dead code detection with vulture
712
+ self._detect_dead_code(result)
713
+
714
+ if self.config.verbose:
715
+ print(f" Detected {len(result.smells)} code smells")
716
+
717
+ def _detect_dead_code(self, result: AnalysisResult) -> None:
718
+ """Use vulture to find dead code and update reachability."""
719
+ if self.config.verbose:
720
+ print("Detecting dead code with vulture...")
721
+
722
+ try:
723
+ v = vulture.Vulture(verbose=False)
724
+
725
+ # vulture.scan takes the code content as a string
726
+ for py_file in Path(result.project_path).rglob("*.py"):
727
+ if not self.file_filter.should_process(str(py_file)):
728
+ continue
729
+ try:
730
+ content = py_file.read_text(encoding='utf-8', errors='ignore')
731
+ v.scan(content, filename=str(py_file))
732
+ except Exception:
733
+ continue
734
+
735
+ dead_code = v.get_unused_code()
736
+
737
+ if self.config.verbose:
738
+ print(f" Vulture found {len(dead_code)} unused items")
739
+
740
+ # Map unused code to our functions/classes
741
+ for item in dead_code:
742
+ if self.config.verbose:
743
+ item_lineno = getattr(item, 'lineno', getattr(item, 'first_lineno', 0))
744
+ print(f" Vulture item: {item.filename}:{item_lineno} ({item.typ})")
745
+
746
+ # Match by file and line
747
+ item_path = Path(item.filename).resolve()
748
+ item_lineno = getattr(item, 'lineno', getattr(item, 'first_lineno', 0))
749
+ for func_name, func_info in result.functions.items():
750
+ func_path = Path(func_info.file).resolve()
751
+ if func_path == item_path and func_info.line == item_lineno:
752
+ func_info.reachability = "unreachable"
753
+
754
+ for class_name, class_info in result.classes.items():
755
+ if Path(class_info.file).resolve() == Path(item.filename).resolve() and class_info.line == item.lineno:
756
+ class_info.reachability = "unreachable" # (if we add reachability to ClassInfo too)
757
+
758
+ # Mark others as reachable if they are NOT orphans
759
+ for func_name, func_info in result.functions.items():
760
+ if func_info.reachability == "unknown":
761
+ if func_info.called_by or func_name in result.entry_points:
762
+ func_info.reachability = "reachable"
763
+ except Exception as e:
764
+ if self.config.verbose:
765
+ print(f"Error in dead code detection: {e}")