code2flow-toon 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. code2flow/__init__.py +47 -0
  2. code2flow/__main__.py +6 -0
  3. code2flow/analysis/__init__.py +17 -0
  4. code2flow/analysis/call_graph.py +210 -0
  5. code2flow/analysis/cfg.py +293 -0
  6. code2flow/analysis/coupling.py +77 -0
  7. code2flow/analysis/data_analysis.py +249 -0
  8. code2flow/analysis/dfg.py +224 -0
  9. code2flow/analysis/smells.py +192 -0
  10. code2flow/cli.py +464 -0
  11. code2flow/core/__init__.py +36 -0
  12. code2flow/core/analyzer.py +765 -0
  13. code2flow/core/config.py +177 -0
  14. code2flow/core/models.py +194 -0
  15. code2flow/core/streaming_analyzer.py +666 -0
  16. code2flow/exporters/__init__.py +17 -0
  17. code2flow/exporters/base.py +13 -0
  18. code2flow/exporters/json_exporter.py +17 -0
  19. code2flow/exporters/llm_exporter.py +199 -0
  20. code2flow/exporters/mermaid_exporter.py +67 -0
  21. code2flow/exporters/toon.py +401 -0
  22. code2flow/exporters/yaml_exporter.py +108 -0
  23. code2flow/llm_flow_generator.py +451 -0
  24. code2flow/llm_task_generator.py +263 -0
  25. code2flow/mermaid_generator.py +481 -0
  26. code2flow/nlp/__init__.py +23 -0
  27. code2flow/nlp/config.py +174 -0
  28. code2flow/nlp/entity_resolution.py +326 -0
  29. code2flow/nlp/intent_matching.py +297 -0
  30. code2flow/nlp/normalization.py +122 -0
  31. code2flow/nlp/pipeline.py +388 -0
  32. code2flow/patterns/__init__.py +0 -0
  33. code2flow/patterns/detector.py +168 -0
  34. code2flow/refactor/__init__.py +0 -0
  35. code2flow/refactor/prompt_engine.py +150 -0
  36. code2flow/visualizers/__init__.py +0 -0
  37. code2flow/visualizers/graph.py +196 -0
  38. code2flow_toon-0.2.4.dist-info/METADATA +599 -0
  39. code2flow_toon-0.2.4.dist-info/RECORD +43 -0
  40. code2flow_toon-0.2.4.dist-info/WHEEL +5 -0
  41. code2flow_toon-0.2.4.dist-info/entry_points.txt +2 -0
  42. code2flow_toon-0.2.4.dist-info/licenses/LICENSE +201 -0
  43. code2flow_toon-0.2.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,17 @@
1
+ """JSON Exporter for code2flow."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from .base import Exporter
6
+ from ..core.models import AnalysisResult
7
+
8
+
9
+ class JSONExporter(Exporter):
10
+ """Export to JSON format."""
11
+
12
+ def export(self, result: AnalysisResult, output_path: str, compact: bool = True, include_defaults: bool = False) -> None:
13
+ """Export to JSON file."""
14
+ data = result.to_dict(compact=compact and not include_defaults)
15
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
16
+ with open(output_path, 'w', encoding='utf-8') as f:
17
+ json.dump(data, f, indent=2 if not compact else None, ensure_ascii=False)
@@ -0,0 +1,199 @@
1
+ """LLM Prompt Exporter for code2flow - Refactored for lower complexity."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any, Dict, List, Tuple
5
+ from .base import Exporter
6
+ from ..core.models import AnalysisResult, FunctionInfo
7
+
8
+
9
+ class LLMPromptExporter(Exporter):
10
+ """Export LLM-ready analysis summary with architecture and flows."""
11
+
12
+ def export(self, result: AnalysisResult, output_path: str) -> None:
13
+ """Generate comprehensive LLM prompt with architecture description."""
14
+ lines = ["# System Architecture Analysis", ""]
15
+
16
+ # Collect sections
17
+ lines.extend(self._get_overview(result))
18
+ lines.extend(self._get_architecture_by_module(result))
19
+
20
+ important_entries = self._get_important_entries(result)
21
+ lines.extend(self._get_key_entry_points(important_entries))
22
+ lines.extend(self._get_process_flows(result, important_entries))
23
+
24
+ lines.extend(self._get_key_classes(result))
25
+ lines.extend(self._get_data_transformations(result))
26
+ lines.extend(self._get_behavioral_patterns(result))
27
+ lines.extend(self._get_api_surface(result))
28
+ lines.extend(self._get_system_interactions(important_entries))
29
+
30
+ lines.extend([
31
+ "## Reverse Engineering Guidelines",
32
+ "",
33
+ "1. **Entry Points**: Start analysis from the entry points listed above",
34
+ "2. **Core Logic**: Focus on classes with many methods",
35
+ "3. **Data Flow**: Follow data transformation functions",
36
+ "4. **Process Flows**: Use the flow diagrams for execution paths",
37
+ "5. **API Surface**: Public API functions reveal the interface",
38
+ "",
39
+ "## Context for LLM",
40
+ "",
41
+ "Maintain the identified architectural patterns and public API surface when suggesting changes.",
42
+ ])
43
+
44
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
45
+ with open(output_path, 'w', encoding='utf-8') as f:
46
+ f.write('\n'.join(lines))
47
+
48
+ def _get_overview(self, result: AnalysisResult) -> List[str]:
49
+ return [
50
+ "## Overview",
51
+ "",
52
+ f"- **Project**: {result.project_path}",
53
+ f"- **Analysis Mode**: {result.analysis_mode}",
54
+ f"- **Total Functions**: {len(result.functions)}",
55
+ f"- **Total Classes**: {len(result.classes)}",
56
+ f"- **Modules**: {len(result.modules)}",
57
+ f"- **Entry Points**: {len(result.entry_points)}",
58
+ "",
59
+ ]
60
+
61
+ def _get_architecture_by_module(self, result: AnalysisResult) -> List[str]:
62
+ lines = ["## Architecture by Module", ""]
63
+ module_stats = []
64
+ for mod_name, mod in result.modules.items():
65
+ if len(mod.functions) > 0 or len(mod.classes) > 0:
66
+ module_stats.append((mod_name, len(mod.functions), len(mod.classes), mod.file))
67
+
68
+ module_stats.sort(key=lambda x: x[1], reverse=True)
69
+ for mod_name, f_count, c_count, file_path in module_stats[:20]:
70
+ lines.append(f"### {mod_name}")
71
+ lines.append(f"- **Functions**: {f_count}")
72
+ if c_count > 0: lines.append(f"- **Classes**: {c_count}")
73
+ if file_path: lines.append(f"- **File**: `{Path(file_path).name}`")
74
+ lines.append("")
75
+ return lines
76
+
77
+ def _get_important_entries(self, result: AnalysisResult) -> List[Tuple[str, int, Any]]:
78
+ entries = []
79
+ for ep in result.entry_points:
80
+ func = result.functions.get(ep)
81
+ if func:
82
+ score = len(func.calls) + len(func.called_by)
83
+ entries.append((ep, score, func))
84
+ entries.sort(key=lambda x: x[1], reverse=True)
85
+ return entries
86
+
87
+ def _get_key_entry_points(self, important_entries: List[Tuple[str, int, Any]]) -> List[str]:
88
+ lines = ["## Key Entry Points", "", "Main execution flows into the system:", ""]
89
+ for ep, _, func in important_entries[:30]:
90
+ lines.append(f"### {ep}")
91
+ if func.docstring: lines.append(f"> {func.docstring[:150]}")
92
+ if func.calls: lines.append(f"- **Calls**: {', '.join(func.calls[:8])}")
93
+ lines.append("")
94
+ return lines
95
+
96
+ def _get_process_flows(self, result: AnalysisResult, important_entries: List[Tuple[str, int, Any]]) -> List[str]:
97
+ lines = ["## Process Flows", "", "Key execution flows identified:", ""]
98
+ flow_id, seen_flows, seen_base_names = 1, set(), set()
99
+ for ep_name, _, ep_func in important_entries[:20]:
100
+ base_name = ep_name.split('.')[-1]
101
+ if base_name in seen_base_names: continue
102
+ seen_base_names.add(base_name)
103
+ flow = self._trace_flow(ep_name, ep_func, result, depth=3)
104
+ if flow and flow not in seen_flows:
105
+ seen_flows.add(flow)
106
+ lines.extend([f"### Flow {flow_id}: {base_name}", "```", flow, "```", ""])
107
+ flow_id += 1
108
+ if flow_id > 10: break
109
+ return lines
110
+
111
+ def _get_key_classes(self, result: AnalysisResult) -> List[str]:
112
+ if not result.classes: return []
113
+ lines = ["## Key Classes", ""]
114
+ class_list = sorted(result.classes.items(), key=lambda x: len(x[1].methods), reverse=True)
115
+ for cls_name, cls in class_list[:20]:
116
+ lines.append(f"### {cls_name}")
117
+ if cls.docstring: lines.append(f"> {cls.docstring[:100]}")
118
+ lines.append(f"- **Methods**: {len(cls.methods)}")
119
+ if cls.methods: lines.append(f"- **Key Methods**: {', '.join(cls.methods[:10])}")
120
+ if cls.bases: lines.append(f"- **Inherits**: {', '.join(cls.bases)}")
121
+ lines.append("")
122
+ return lines
123
+
124
+ def _get_data_transformations(self, result: AnalysisResult) -> List[str]:
125
+ lines = ["## Data Transformation Functions", "", "Key functions that process and transform data:", ""]
126
+ data_indicators = ['parse', 'transform', 'convert', 'process', 'validate', 'serialize', 'deserialize', 'encode', 'decode', 'format']
127
+ data_funcs = [
128
+ (name, f) for name, f in result.functions.items()
129
+ if any(ind in f.name.lower() for ind in data_indicators)
130
+ ]
131
+ for func_name, func in data_funcs[:25]:
132
+ lines.append(f"### {func_name}")
133
+ if func.docstring: lines.append(f"> {func.docstring[:100]}")
134
+ if func.calls: lines.append(f"- **Output to**: {', '.join(func.calls[:5])}")
135
+ lines.append("")
136
+ return lines
137
+
138
+ def _get_behavioral_patterns(self, result: AnalysisResult) -> List[str]:
139
+ if not result.patterns: return []
140
+ lines = ["## Behavioral Patterns", ""]
141
+ for pattern in result.patterns[:15]:
142
+ lines.append(f"### {pattern.name}")
143
+ lines.append(f"- **Type**: {pattern.type}")
144
+ lines.append(f"- **Confidence**: {pattern.confidence:.2f}")
145
+ if pattern.functions: lines.append(f"- **Functions**: {', '.join(pattern.functions[:5])}")
146
+ lines.append("")
147
+ return lines
148
+
149
+ def _get_api_surface(self, result: AnalysisResult) -> List[str]:
150
+ lines = ["## Public API Surface", "", "Functions exposed as public API (no underscore prefix):", ""]
151
+ public_funcs = sorted(
152
+ [(n, f) for n, f in result.functions.items() if not f.name.startswith('_') and '.' in n],
153
+ key=lambda x: len(x[1].calls), reverse=True
154
+ )
155
+ for func_name, func in public_funcs[:40]:
156
+ lines.append(f"- `{func_name}` - {len(func.calls)} calls")
157
+ lines.append("")
158
+ return lines
159
+
160
+ def _get_system_interactions(self, important_entries: List[Tuple[str, int, Any]]) -> List[str]:
161
+ lines = ["## System Interactions", "", "How components interact:", "", "```mermaid", "graph TD"]
162
+ added_edges = set()
163
+ for ep_name, _, ep_func in important_entries[:15]:
164
+ for called in ep_func.calls[:5]:
165
+ edge = (ep_name.split('.')[-1][:20], called.split('.')[-1][:20])
166
+ if edge not in added_edges and len(added_edges) < 30:
167
+ added_edges.add(edge)
168
+ lines.append(f" {edge[0]} --> {edge[1]}")
169
+ lines.extend(["```", ""])
170
+ return lines
171
+
172
+ def _trace_flow(self, func_name: str, func, result: AnalysisResult, depth: int, visited: set = None) -> str:
173
+ """Trace execution flow from a function with cycle detection."""
174
+ if visited is None: visited = set()
175
+ if func_name in visited or depth <= 0: return func_name.split('.')[-1]
176
+
177
+ visited.add(func_name)
178
+ short_name = func_name.split('.')[-1]
179
+ module = func_name.rsplit('.', 1)[0] if '.' in func_name else 'root'
180
+ lines = [f"{short_name} [{module}]"]
181
+
182
+ calls_by_module = {}
183
+ for called in func.calls[:5]:
184
+ mod = called.rsplit('.', 1)[0] if '.' in called else 'root'
185
+ if mod not in calls_by_module: calls_by_module[mod] = []
186
+ calls_by_module[mod].append(called)
187
+
188
+ shown = 0
189
+ for mod, calls in sorted(calls_by_module.items(), key=lambda x: x[0] != module):
190
+ for called in calls[:2]:
191
+ if shown >= 3: break
192
+ called_func = result.functions.get(called)
193
+ if called_func and called not in visited:
194
+ sub_flow = self._trace_flow(called, called_func, result, depth - 1, visited.copy())
195
+ cross = " →" if mod != module else ""
196
+ lines.append(f" └─{cross}> {called.split('.')[-1]}")
197
+ for sub in sub_flow.split('\n')[1:][:3]: lines.append(" " + sub)
198
+ shown += 1
199
+ return '\n'.join(lines)
@@ -0,0 +1,67 @@
1
+ """Mermaid Exporter for code2flow."""
2
+
3
+ from pathlib import Path
4
+ from typing import Dict
5
+ from .base import Exporter
6
+ from ..core.models import AnalysisResult
7
+
8
+
9
+ class MermaidExporter(Exporter):
10
+ """Export call graph to Mermaid format."""
11
+
12
+ def export(self, result: AnalysisResult, output_path: str) -> None:
13
+ """Export call graph as Mermaid flowchart."""
14
+ self.export_call_graph(result, output_path)
15
+
16
+ def export_call_graph(self, result: AnalysisResult, output_path: str) -> None:
17
+ """Export simplified call graph."""
18
+ lines = ["flowchart TD"]
19
+
20
+ # Add nodes grouped by module
21
+ modules: Dict[str, list] = {}
22
+ for func_name in result.functions:
23
+ parts = func_name.split('.')
24
+ module = parts[0] if parts else 'unknown'
25
+ if module not in modules:
26
+ modules[module] = []
27
+ modules[module].append(func_name)
28
+
29
+ # Create subgraphs
30
+ for module, funcs in sorted(modules.items()):
31
+ safe_module = module.replace('-', '_').replace('.', '_')
32
+ lines.append(f" subgraph {safe_module}")
33
+ for func_name in funcs[:50]:
34
+ safe_id = self._safe_id(func_name)
35
+ short_name = func_name.split('.')[-1][:30]
36
+ lines.append(f' {safe_id}["{short_name}"]')
37
+ lines.append(" end")
38
+
39
+ # Add edges
40
+ edge_count = 0
41
+ max_edges = 500
42
+ for func_name, func in result.functions.items():
43
+ source_id = self._safe_id(func_name)
44
+ for called in func.calls[:10]:
45
+ if called in result.functions:
46
+ target_id = self._safe_id(called)
47
+ lines.append(f" {source_id} --> {target_id}")
48
+ edge_count += 1
49
+ if edge_count >= max_edges:
50
+ break
51
+ if edge_count >= max_edges:
52
+ break
53
+
54
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
55
+ with open(output_path, 'w', encoding='utf-8') as f:
56
+ f.write('\n'.join(lines))
57
+
58
+ def export_compact(self, result: AnalysisResult, output_path: str) -> None:
59
+ """Export compact flowchart - same as call graph for now."""
60
+ self.export_call_graph(result, output_path)
61
+
62
+ def _safe_id(self, name: str) -> str:
63
+ """Create Mermaid-safe node ID."""
64
+ safe = name.replace('.', '_').replace('-', '_').replace(':', '_')
65
+ if len(safe) > 40:
66
+ safe = safe[:20] + '_' + str(hash(name) % 10000)
67
+ return safe