code2flow-toon 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. code2flow/__init__.py +47 -0
  2. code2flow/__main__.py +6 -0
  3. code2flow/analysis/__init__.py +17 -0
  4. code2flow/analysis/call_graph.py +210 -0
  5. code2flow/analysis/cfg.py +293 -0
  6. code2flow/analysis/coupling.py +77 -0
  7. code2flow/analysis/data_analysis.py +249 -0
  8. code2flow/analysis/dfg.py +224 -0
  9. code2flow/analysis/smells.py +192 -0
  10. code2flow/cli.py +464 -0
  11. code2flow/core/__init__.py +36 -0
  12. code2flow/core/analyzer.py +765 -0
  13. code2flow/core/config.py +177 -0
  14. code2flow/core/models.py +194 -0
  15. code2flow/core/streaming_analyzer.py +666 -0
  16. code2flow/exporters/__init__.py +17 -0
  17. code2flow/exporters/base.py +13 -0
  18. code2flow/exporters/json_exporter.py +17 -0
  19. code2flow/exporters/llm_exporter.py +199 -0
  20. code2flow/exporters/mermaid_exporter.py +67 -0
  21. code2flow/exporters/toon.py +401 -0
  22. code2flow/exporters/yaml_exporter.py +108 -0
  23. code2flow/llm_flow_generator.py +451 -0
  24. code2flow/llm_task_generator.py +263 -0
  25. code2flow/mermaid_generator.py +481 -0
  26. code2flow/nlp/__init__.py +23 -0
  27. code2flow/nlp/config.py +174 -0
  28. code2flow/nlp/entity_resolution.py +326 -0
  29. code2flow/nlp/intent_matching.py +297 -0
  30. code2flow/nlp/normalization.py +122 -0
  31. code2flow/nlp/pipeline.py +388 -0
  32. code2flow/patterns/__init__.py +0 -0
  33. code2flow/patterns/detector.py +168 -0
  34. code2flow/refactor/__init__.py +0 -0
  35. code2flow/refactor/prompt_engine.py +150 -0
  36. code2flow/visualizers/__init__.py +0 -0
  37. code2flow/visualizers/graph.py +196 -0
  38. code2flow_toon-0.2.4.dist-info/METADATA +599 -0
  39. code2flow_toon-0.2.4.dist-info/RECORD +43 -0
  40. code2flow_toon-0.2.4.dist-info/WHEEL +5 -0
  41. code2flow_toon-0.2.4.dist-info/entry_points.txt +2 -0
  42. code2flow_toon-0.2.4.dist-info/licenses/LICENSE +201 -0
  43. code2flow_toon-0.2.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,401 @@
1
+ """Toon Exporter - Optimized compact format for code2flow."""
2
+
3
+ import yaml
4
+ from collections import defaultdict
5
+ from pathlib import Path
6
+ from typing import Any, Dict
7
+
8
+ from ..core.models import AnalysisResult
9
+
10
+
11
+ class ToonExporter:
12
+ """Export to optimized toon format with enhanced sorting and compression."""
13
+
14
+ def __init__(self):
15
+ self.complexity_weights = {
16
+ 'FOR': 2, 'WHILE': 2, # Loops are most complex
17
+ 'IF': 1, # Conditions add complexity
18
+ 'method_call': 1, # Method calls add complexity
19
+ 'assign': 0.5, # Assignments add minor complexity
20
+ 'RETURN': 0.5 # Returns add minor complexity
21
+ }
22
+
23
+ def export(self, result: AnalysisResult, output_path: str, **kwargs) -> None:
24
+ """Export analysis result to toon format."""
25
+ toon_data = self._build_toon_data(result)
26
+
27
+ # Optimize sorting and structure
28
+ toon_data = self._optimize_structure(toon_data)
29
+
30
+ # Save with optimized YAML settings
31
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
32
+ with open(output_path, 'w', encoding='utf-8') as f:
33
+ yaml.dump(toon_data, f,
34
+ default_flow_style=True,
35
+ sort_keys=False,
36
+ allow_unicode=True,
37
+ width=120) # Wider width for better readability
38
+
39
+ def _build_toon_data(self, result: AnalysisResult) -> Dict[str, Any]:
40
+ """Build comprehensive toon format from analysis result."""
41
+ # Initialize structure
42
+ toon_data = {
43
+ 'meta': {
44
+ 'project': result.project_path,
45
+ 'mode': result.analysis_mode,
46
+ 'generated': self._get_timestamp(),
47
+ 'version': '2.0'
48
+ },
49
+ 'stats': result.stats,
50
+ 'functions': [],
51
+ 'classes': [],
52
+ 'modules': [],
53
+ 'patterns': [],
54
+ 'call_graph': [],
55
+ 'insights': {}
56
+ }
57
+
58
+ # Process functions with optimized analysis
59
+ function_analysis = self._analyze_functions(result)
60
+ toon_data['functions'] = function_analysis['functions']
61
+
62
+ # Build classes and modules from function data
63
+ toon_data['classes'] = self._build_classes(function_analysis['functions'])
64
+ toon_data['modules'] = self._build_modules(function_analysis['functions'])
65
+
66
+ # Extract patterns and call graph
67
+ toon_data['patterns'] = self._extract_patterns(result)
68
+ toon_data['call_graph'] = self._build_call_graph(result)
69
+
70
+ # Generate insights
71
+ toon_data['insights'] = self._generate_insights(toon_data)
72
+
73
+ return toon_data
74
+
75
+ def _analyze_functions(self, result: AnalysisResult) -> Dict[str, Any]:
76
+ """Analyze functions with optimized complexity calculation."""
77
+ function_nodes = defaultdict(list)
78
+
79
+ # Group nodes by function
80
+ for node_id, node_data in result.nodes.items():
81
+ func_name = getattr(node_data, 'function', '')
82
+ if func_name:
83
+ function_nodes[func_name].append(node_data)
84
+
85
+ functions = []
86
+ complexity_distribution = defaultdict(int)
87
+
88
+ for func_name, nodes in function_nodes.items():
89
+ # Calculate complexity using weighted system
90
+ complexity_score = self._calculate_complexity(nodes)
91
+ complexity_distribution[self._get_complexity_tier(complexity_score)] += 1
92
+
93
+ # Skip detailed entry if complexity is low (CC < 3.0),
94
+ # but still keep it in distribution/averages
95
+ if complexity_score < 3.0:
96
+ continue
97
+
98
+ # Extract function characteristics in compact trait format
99
+ traits = self._extract_traits(nodes)
100
+
101
+ # Build function entry
102
+ func_entry = {
103
+ 'name': func_name.split('.')[-1] if '.' in func_name else func_name,
104
+ 'module': func_name.rsplit('.', 1)[0] if '.' in func_name else 'root',
105
+ 'complexity': complexity_score,
106
+ 'tier': self._get_complexity_tier(complexity_score),
107
+ 'nodes': len(nodes),
108
+ 'traits': traits,
109
+ 'exits': len([n for n in nodes if getattr(n, 'type', '') in ['EXIT', 'RETURN']])
110
+ }
111
+
112
+ functions.append(func_entry)
113
+
114
+ return {
115
+ 'functions': functions,
116
+ 'complexity_distribution': dict(complexity_distribution)
117
+ }
118
+
119
+ def _calculate_complexity(self, nodes) -> float:
120
+ """Calculate weighted complexity score."""
121
+ complexity = 0.0
122
+
123
+ for node in nodes:
124
+ node_type = getattr(node, 'type', 'FUNC')
125
+ complexity += self.complexity_weights.get(node_type, 0)
126
+
127
+ # Add size-based complexity
128
+ if len(nodes) > 20:
129
+ complexity += 2
130
+ elif len(nodes) > 10:
131
+ complexity += 1
132
+
133
+ return round(complexity, 2)
134
+
135
+ def _get_complexity_tier(self, score: float) -> str:
136
+ """Get complexity tier from score."""
137
+ if score >= 5:
138
+ return 'critical'
139
+ elif score >= 3:
140
+ return 'high'
141
+ elif score >= 1.5:
142
+ return 'medium'
143
+ elif score > 0:
144
+ return 'low'
145
+ else:
146
+ return 'basic'
147
+
148
+ def _extract_traits(self, nodes) -> list:
149
+ """Extract function traits in compact list format."""
150
+ node_types = set(getattr(node, 'type', 'FUNC') for node in nodes)
151
+ traits = []
152
+
153
+ if any(t in ['FOR', 'WHILE'] for t in node_types): traits.append('loops')
154
+ if 'IF' in node_types: traits.append('conditions')
155
+ if 'RETURN' in node_types: traits.append('returns')
156
+ if 'assign' in node_types: traits.append('assigns')
157
+ if 'method_call' in node_types: traits.append('calls')
158
+
159
+ return traits
160
+
161
+ def _build_classes(self, functions: list) -> list:
162
+ """Build class information from function data."""
163
+ class_map = {}
164
+
165
+ for func in functions:
166
+ module_parts = func['module'].split('.')
167
+
168
+ # Look for class patterns (module.ClassName)
169
+ if len(module_parts) >= 2 and module_parts[-1][0].isupper():
170
+ class_name = module_parts[-1]
171
+ module_name = '.'.join(module_parts[:-1])
172
+
173
+ class_key = f"{module_name}.{class_name}"
174
+
175
+ if class_key not in class_map:
176
+ class_map[class_key] = {
177
+ 'name': class_name,
178
+ 'module': module_name,
179
+ 'methods': [],
180
+ 'method_count': 0,
181
+ 'complexity_scores': [],
182
+ 'total_nodes': 0
183
+ }
184
+
185
+ class_map[class_key]['methods'].append(func['name'])
186
+ class_map[class_key]['method_count'] += 1
187
+ class_map[class_key]['complexity_scores'].append(func['complexity'])
188
+ class_map[class_key]['total_nodes'] += func['nodes']
189
+
190
+ # Calculate class-level metrics
191
+ classes = []
192
+ for class_data in class_map.values():
193
+ if class_data['method_count'] > 0:
194
+ class_data['avg_complexity'] = round(
195
+ sum(class_data['complexity_scores']) / len(class_data['complexity_scores']), 2
196
+ )
197
+ class_data['max_complexity'] = max(class_data['complexity_scores'])
198
+ class_data['complex_methods'] = len([
199
+ c for c in class_data['complexity_scores'] if c >= 3
200
+ ])
201
+
202
+ # Remove temporary arrays
203
+ del class_data['complexity_scores']
204
+ classes.append(class_data)
205
+
206
+ return classes
207
+
208
+ def _build_modules(self, functions: list) -> list:
209
+ """Build module information from function data."""
210
+ module_map = {}
211
+
212
+ for func in functions:
213
+ module = func['module']
214
+
215
+ if module not in module_map:
216
+ module_map[module] = {
217
+ 'name': module,
218
+ 'functions': [],
219
+ 'function_count': 0,
220
+ 'complexity_scores': [],
221
+ 'total_nodes': 0,
222
+ 'tiers': defaultdict(int)
223
+ }
224
+
225
+ module_map[module]['functions'].append(func['name'])
226
+ module_map[module]['function_count'] += 1
227
+ module_map[module]['complexity_scores'].append(func['complexity'])
228
+ module_map[module]['total_nodes'] += func['nodes']
229
+ module_map[module]['tiers'][func['tier']] += 1
230
+
231
+ # Calculate module-level metrics
232
+ modules = []
233
+ for module_data in module_map.values():
234
+ if module_data['function_count'] > 0:
235
+ scores = module_data['complexity_scores']
236
+ module_data['avg_complexity'] = round(sum(scores) / len(scores), 2)
237
+ module_data['max_complexity'] = max(scores)
238
+ module_data['complex_functions'] = len([c for c in scores if c >= 3])
239
+ module_data['critical_functions'] = len([c for c in scores if c >= 5])
240
+
241
+ # Convert tiers to regular dict
242
+ module_data['complexity_distribution'] = dict(module_data['tiers'])
243
+ del module_data['tiers']
244
+ del module_data['complexity_scores']
245
+
246
+ modules.append(module_data)
247
+
248
+ return modules
249
+
250
+ def _extract_patterns(self, result: AnalysisResult) -> list:
251
+ """Extract code patterns with enhanced analysis."""
252
+ pattern_counts = defaultdict(lambda: {'functions': [], 'count': 0})
253
+
254
+ for node_id, node_data in result.nodes.items():
255
+ node_type = getattr(node_data, 'type', 'FUNC')
256
+
257
+ if node_type in ['god_function', 'feature_envy', 'shotgun_surgery', 'data_clump', 'state_machine']:
258
+ func_name = getattr(node_data, 'function', '')
259
+ if func_name:
260
+ pattern_counts[node_type]['functions'].append(func_name)
261
+ pattern_counts[node_type]['count'] += 1
262
+
263
+ # Build pattern list with severity assessment
264
+ patterns = []
265
+ severity_map = {
266
+ 'god_function': 'critical',
267
+ 'shotgun_surgery': 'high',
268
+ 'feature_envy': 'medium',
269
+ 'data_clump': 'medium',
270
+ 'state_machine': 'low'
271
+ }
272
+
273
+ for pattern_type, data in pattern_counts.items():
274
+ if data['count'] > 0:
275
+ patterns.append({
276
+ 'type': pattern_type,
277
+ 'severity': severity_map.get(pattern_type, 'unknown'),
278
+ 'count': data['count'],
279
+ 'functions': data['functions'][:10], # Limit for readability
280
+ 'percentage': round((data['count'] / len(result.functions)) * 100, 1)
281
+ })
282
+
283
+ return sorted(patterns, key=lambda x: x['count'], reverse=True)
284
+
285
+ def _build_call_graph(self, result: AnalysisResult) -> list:
286
+ """Build optimized call graph with importance scoring."""
287
+ call_counts = defaultdict(lambda: {'calls': [], 'call_count': 0, 'targets': set()})
288
+
289
+ # Analyze edges
290
+ for edge in result.edges:
291
+ source = getattr(edge, 'source', '')
292
+ target = getattr(edge, 'target', '')
293
+
294
+ if source and target:
295
+ call_counts[source]['calls'].append(target)
296
+ call_counts[source]['call_count'] += 1
297
+ call_counts[source]['targets'].add(target)
298
+
299
+ # Build call graph with importance metrics
300
+ call_graph = []
301
+ for caller, data in call_counts.items():
302
+ if data['call_count'] >= 2: # Only include significant callers
303
+ entry = {
304
+ 'function': caller.split('.')[-1] if '.' in caller else caller,
305
+ 'module': caller.rsplit('.', 1)[0] if '.' in caller else 'root',
306
+ 'calls_count': data['call_count'],
307
+ 'unique_targets': len(data['targets']),
308
+ 'targets': list(data['targets'])[:5], # Top 5 targets
309
+ 'importance': self._calculate_importance(data['call_count'], len(data['targets']))
310
+ }
311
+ call_graph.append(entry)
312
+
313
+ return sorted(call_graph, key=lambda x: x['importance'], reverse=True)
314
+
315
+ def _calculate_importance(self, call_count: int, target_count: int) -> float:
316
+ """Calculate importance score for call graph entry."""
317
+ # Importance based on call volume and diversity
318
+ return round((call_count * 0.7 + target_count * 0.3), 2)
319
+
320
+ def _generate_insights(self, toon_data: Dict[str, Any]) -> Dict[str, Any]:
321
+ """Generate insights and recommendations."""
322
+ functions = toon_data['functions']
323
+ classes = toon_data['classes']
324
+ modules = toon_data['modules']
325
+ patterns = toon_data['patterns']
326
+
327
+ insights = {
328
+ 'complexity_summary': {
329
+ 'critical_functions': len([f for f in functions if f['tier'] == 'critical']),
330
+ 'high_complexity': len([f for f in functions if f['tier'] == 'high']),
331
+ 'avg_complexity': round(sum(f['complexity'] for f in functions) / len(functions), 2)
332
+ },
333
+ 'top_complex_modules': sorted(
334
+ modules,
335
+ key=lambda x: x['avg_complexity'],
336
+ reverse=True
337
+ )[:5],
338
+ 'pattern_summary': {
339
+ 'total_patterns': len(patterns),
340
+ 'critical_patterns': len([p for p in patterns if p['severity'] == 'critical']),
341
+ 'high_severity_patterns': len([p for p in patterns if p['severity'] == 'high'])
342
+ },
343
+ 'recommendations': self._generate_recommendations(toon_data)
344
+ }
345
+
346
+ return insights
347
+
348
+ def _generate_recommendations(self, toon_data: Dict[str, Any]) -> list:
349
+ """Generate actionable recommendations."""
350
+ recommendations = []
351
+
352
+ # Complexity recommendations
353
+ critical_funcs = len([f for f in toon_data['functions'] if f['tier'] == 'critical'])
354
+ if critical_funcs > 0:
355
+ recommendations.append({
356
+ 'type': 'complexity',
357
+ 'priority': 'high',
358
+ 'message': f"Refactor {critical_funcs} critical functions to improve maintainability"
359
+ })
360
+
361
+ # Pattern recommendations
362
+ critical_patterns = [p for p in toon_data['patterns'] if p['severity'] == 'critical']
363
+ if critical_patterns:
364
+ recommendations.append({
365
+ 'type': 'patterns',
366
+ 'priority': 'critical',
367
+ 'message': f"Address {len(critical_patterns)} critical code patterns"
368
+ })
369
+
370
+ # Module recommendations
371
+ complex_modules = [m for m in toon_data['modules'] if m['avg_complexity'] > 4]
372
+ if complex_modules:
373
+ recommendations.append({
374
+ 'type': 'modules',
375
+ 'priority': 'medium',
376
+ 'message': f"Consider splitting {len(complex_modules)} highly complex modules"
377
+ })
378
+
379
+ return recommendations
380
+
381
+ def _optimize_structure(self, toon_data: Dict[str, Any]) -> Dict[str, Any]:
382
+ """Optimize data structure for better performance and readability."""
383
+ # Sort functions by complexity (highest first)
384
+ toon_data['functions'].sort(key=lambda x: (x['complexity'], x['module'], x['name']), reverse=True)
385
+
386
+ # Sort classes by method count and complexity
387
+ toon_data['classes'].sort(key=lambda x: (x['method_count'], x['avg_complexity']), reverse=True)
388
+
389
+ # Sort modules by complexity and function count
390
+ toon_data['modules'].sort(key=lambda x: (x['avg_complexity'], x['function_count']), reverse=True)
391
+
392
+ # Limit call graph to top entries for readability
393
+ if len(toon_data['call_graph']) > 50:
394
+ toon_data['call_graph'] = toon_data['call_graph'][:50]
395
+
396
+ return toon_data
397
+
398
+ def _get_timestamp(self) -> str:
399
+ """Get current timestamp."""
400
+ from datetime import datetime
401
+ return datetime.now().isoformat()
@@ -0,0 +1,108 @@
1
+ """YAML Exporter for code2flow."""
2
+
3
+ import yaml
4
+ from collections import defaultdict
5
+ from pathlib import Path
6
+ from .base import Exporter
7
+ from ..core.models import AnalysisResult
8
+ from ..analysis.data_analysis import DataAnalyzer
9
+
10
+
11
+ class YAMLExporter(Exporter):
12
+ """Export to YAML format."""
13
+
14
+ def __init__(self):
15
+ self.analyzer = DataAnalyzer()
16
+
17
+ def export(self, result: AnalysisResult, output_path: str, compact: bool = True, include_defaults: bool = False) -> None:
18
+ """Export to YAML file."""
19
+ data = result.to_dict(compact=compact and not include_defaults)
20
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
21
+ with open(output_path, 'w', encoding='utf-8') as f:
22
+ yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
23
+
24
+ def export_grouped(self, result: AnalysisResult, output_path: str) -> None:
25
+ """Export with grouped CFG flows by function."""
26
+ from collections import defaultdict
27
+ func_flows = defaultdict(list)
28
+ for node_id, node in result.nodes.items():
29
+ if hasattr(node, 'function') and node.function:
30
+ func_flows[node.function].append({
31
+ 'id': node_id,
32
+ 'type': getattr(node, 'type', 'unknown'),
33
+ 'label': getattr(node, 'label', ''),
34
+ 'line': getattr(node, 'line', None),
35
+ })
36
+
37
+ grouped_data = {
38
+ 'project': result.project_path,
39
+ 'summary': {'functions': len(result.functions), 'classes': len(result.classes)},
40
+ 'control_flows': {}
41
+ }
42
+ for func_name, nodes in sorted(func_flows.items()):
43
+ if len(nodes) < 2: continue
44
+ sorted_nodes = sorted(nodes, key=lambda n: (n['line'] or 0, n['id']))
45
+ grouped_data['control_flows'][func_name] = {
46
+ 'node_count': len(nodes),
47
+ 'flow_sequence': [{'step': i+1, 'type': n['type'], 'label': n['label'][:50], 'line': n['line']} for i, n in enumerate(sorted_nodes)]
48
+ }
49
+
50
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
51
+ with open(output_path, 'w', encoding='utf-8') as f:
52
+ yaml.dump(grouped_data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
53
+
54
+ def export_data_flow(self, result: AnalysisResult, output_path: str, compact: bool = True) -> None:
55
+ """Export detailed data flow analysis."""
56
+ flow_data = self.analyzer.analyze_data_flow(result)
57
+ flow_data.update({'project_path': result.project_path, 'analysis_type': 'data_flow'})
58
+
59
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
60
+ with open(output_path, 'w', encoding='utf-8') as f:
61
+ yaml.dump(flow_data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
62
+
63
+ def export_data_structures(self, result: AnalysisResult, output_path: str, compact: bool = True) -> None:
64
+ """Export data structure analysis."""
65
+ structure_data = self.analyzer.analyze_data_structures(result)
66
+ structure_data.update({'project_path': result.project_path, 'analysis_type': 'data_structures'})
67
+
68
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
69
+ with open(output_path, 'w', encoding='utf-8') as f:
70
+ yaml.dump(structure_data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
71
+
72
+ def export_separated(self, result: AnalysisResult, output_dir: str, compact: bool = True) -> None:
73
+ """Export separated consolidated and orphan functions."""
74
+ output_path = Path(output_dir)
75
+ output_path.mkdir(parents=True, exist_ok=True)
76
+
77
+ # Simple separation logic: functions reachable from entry points vs others
78
+ connected = {}
79
+ orphans = {}
80
+
81
+ # Reachability is already in FunctionInfo (Sprint 3)
82
+ for name, func in result.functions.items():
83
+ if func.reachability == "reachable" or name in result.entry_points:
84
+ connected[name] = func.to_dict(compact)
85
+ else:
86
+ orphans[name] = func.to_dict(compact)
87
+
88
+ with open(output_path / 'consolidated.yaml', 'w', encoding='utf-8') as f:
89
+ yaml.dump({'functions': connected}, f, default_flow_style=False, allow_unicode=True)
90
+
91
+ with open(output_path / 'orphans.yaml', 'w', encoding='utf-8') as f:
92
+ yaml.dump({'functions': orphans}, f, default_flow_style=False, allow_unicode=True)
93
+
94
+ def export_split(self, result: AnalysisResult, output_dir: str, include_defaults: bool = False) -> None:
95
+ """Export results split by module."""
96
+ output_path = Path(output_dir)
97
+ output_path.mkdir(parents=True, exist_ok=True)
98
+
99
+ modules = defaultdict(lambda: {'functions': {}, 'classes': {}})
100
+ for name, func in result.functions.items():
101
+ modules[func.module]['functions'][name] = func.to_dict(not include_defaults)
102
+ for name, cls in result.classes.items():
103
+ modules[cls.module]['classes'][name] = cls.to_dict(not include_defaults)
104
+
105
+ for mod_name, content in modules.items():
106
+ safe_name = mod_name.replace('.', '_') or 'root'
107
+ with open(output_path / f'{safe_name}.yaml', 'w', encoding='utf-8') as f:
108
+ yaml.dump(content, f, default_flow_style=False, allow_unicode=True)