code2llm 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. code2flow/__init__.py +47 -0
  2. code2flow/__main__.py +6 -0
  3. code2flow/analysis/__init__.py +23 -0
  4. code2flow/analysis/call_graph.py +210 -0
  5. code2flow/analysis/cfg.py +293 -0
  6. code2flow/analysis/coupling.py +77 -0
  7. code2flow/analysis/data_analysis.py +249 -0
  8. code2flow/analysis/dfg.py +224 -0
  9. code2flow/analysis/pipeline_detector.py +445 -0
  10. code2flow/analysis/side_effects.py +313 -0
  11. code2flow/analysis/smells.py +192 -0
  12. code2flow/analysis/type_inference.py +306 -0
  13. code2flow/cli.py +493 -0
  14. code2flow/core/__init__.py +36 -0
  15. code2flow/core/analyzer.py +765 -0
  16. code2flow/core/config.py +177 -0
  17. code2flow/core/models.py +194 -0
  18. code2flow/core/streaming_analyzer.py +666 -0
  19. code2flow/exporters/__init__.py +35 -0
  20. code2flow/exporters/base.py +13 -0
  21. code2flow/exporters/context_exporter.py +207 -0
  22. code2flow/exporters/flow_exporter.py +570 -0
  23. code2flow/exporters/json_exporter.py +17 -0
  24. code2flow/exporters/llm_exporter.py +12 -0
  25. code2flow/exporters/map_exporter.py +218 -0
  26. code2flow/exporters/mermaid_exporter.py +67 -0
  27. code2flow/exporters/toon.py +982 -0
  28. code2flow/exporters/yaml_exporter.py +108 -0
  29. code2flow/llm_flow_generator.py +451 -0
  30. code2flow/llm_task_generator.py +263 -0
  31. code2flow/mermaid_generator.py +481 -0
  32. code2flow/nlp/__init__.py +23 -0
  33. code2flow/nlp/config.py +174 -0
  34. code2flow/nlp/entity_resolution.py +326 -0
  35. code2flow/nlp/intent_matching.py +297 -0
  36. code2flow/nlp/normalization.py +122 -0
  37. code2flow/nlp/pipeline.py +388 -0
  38. code2flow/patterns/__init__.py +0 -0
  39. code2flow/patterns/detector.py +168 -0
  40. code2flow/refactor/__init__.py +0 -0
  41. code2flow/refactor/prompt_engine.py +150 -0
  42. code2flow/visualizers/__init__.py +0 -0
  43. code2flow/visualizers/graph.py +196 -0
  44. code2llm-0.3.7.dist-info/METADATA +604 -0
  45. code2llm-0.3.7.dist-info/RECORD +49 -0
  46. code2llm-0.3.7.dist-info/WHEEL +5 -0
  47. code2llm-0.3.7.dist-info/entry_points.txt +2 -0
  48. code2llm-0.3.7.dist-info/licenses/LICENSE +201 -0
  49. code2llm-0.3.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,150 @@
1
+ """Engine for generating refactoring prompts using Jinja2 templates."""
2
+ import os
3
+ import jinja2
4
+ import tiktoken
5
+ from tree_sitter import Language, Parser
6
+ import tree_sitter_python
7
+ from typing import List, Dict, Any, Optional
8
+ from ..core.models import AnalysisResult, CodeSmell
9
+
10
+ class PromptEngine:
11
+ """Generate refactoring prompts from analysis results and detected smells."""
12
+
13
+ def __init__(self, result: AnalysisResult, template_dir: Optional[str] = None):
14
+ if template_dir is None:
15
+ # Default to templates directory relative to this file
16
+ template_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'templates')
17
+
18
+ self.result = result
19
+ self.env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir))
20
+
21
+ # Initialize tiktoken for context management
22
+ try:
23
+ self.encoding = tiktoken.get_encoding("cl100k_base") # GPT-4/3.5-turbo encoding
24
+ except Exception:
25
+ self.encoding = None
26
+
27
+ # Initialize tree-sitter for precision extraction
28
+ try:
29
+ self.PY_LANGUAGE = Language(tree_sitter_python.language())
30
+ self.parser = Parser(self.PY_LANGUAGE)
31
+ except Exception:
32
+ self.parser = None
33
+
34
+ def generate_prompts(self) -> Dict[str, str]:
35
+ """Generate a prompt for each detected code smell."""
36
+ prompts = {}
37
+
38
+ for i, smell in enumerate(self.result.smells):
39
+ prompt = self._generate_prompt_for_smell(smell)
40
+ if prompt:
41
+ # Truncate prompt if it exceeds token limit (e.g., 4000 tokens)
42
+ if self.encoding:
43
+ tokens = self.encoding.encode(prompt)
44
+ if len(tokens) > 4000:
45
+ prompt = self.encoding.decode(tokens[:3800]) + "\n\n... (prompt truncated due to length) ..."
46
+
47
+ # Use a unique name for each prompt
48
+ filename = f"{i+1:02d}_{smell.type}_{smell.name.lower().replace(' ', '_').replace(':', '')}.md"
49
+ prompts[filename] = prompt
50
+ return prompts
51
+
52
+ def _generate_prompt_for_smell(self, smell: CodeSmell) -> Optional[str]:
53
+ """Generate a single prompt from a CodeSmell."""
54
+ template_name = self._get_template_for_type(smell.type)
55
+ if not template_name:
56
+ return None
57
+
58
+ try:
59
+ template = self.env.get_template(template_name)
60
+ context = self._build_context_for_smell(smell)
61
+ return template.render(**context)
62
+ except Exception as e:
63
+ print(f"Error generating prompt for {smell.name}: {e}")
64
+ return None
65
+
66
+ def _get_template_for_type(self, smell_type: str) -> Optional[str]:
67
+ """Map smell type to Jinja2 template filename."""
68
+ mapping = {
69
+ "god_function": "extract_method.md",
70
+ "feature_envy": "move_method.md",
71
+ "data_clump": "move_method.md",
72
+ "shotgun_surgery": "extract_method.md",
73
+ "bottleneck": "extract_method.md",
74
+ "circular_dependency": "move_method.md"
75
+ }
76
+ return mapping.get(smell_type)
77
+
78
+ def _build_context_for_smell(self, smell: CodeSmell) -> Dict[str, Any]:
79
+ """Prepare context data for the Jinja2 template."""
80
+ # Extract source code for context
81
+ source_code = self._get_source_context(smell.file, smell.line)
82
+
83
+ # Prepare metrics
84
+ metrics = self.result.metrics.get(smell.name.split(': ')[-1], {}) # Heuristic to find function name
85
+ if not metrics and 'function' in smell.context:
86
+ metrics = self.result.metrics.get(smell.context['function'], {})
87
+
88
+ # Prepare mutations
89
+ mutations = [m for m in self.result.mutations if m.scope in (smell.name.split(': ')[-1], smell.context.get('function'))]
90
+ mutations_summary = f"{len(mutations)} modifications recorded: {', '.join(set([m.variable for m in mutations[:5]]))}..."
91
+
92
+ context = {
93
+ "target_function": smell.name.split(': ')[-1],
94
+ "reason": smell.description,
95
+ "metrics": metrics,
96
+ "mutations_context": mutations_summary,
97
+ "source_file": smell.file,
98
+ "start_line": smell.line,
99
+ "end_line": smell.line + 20, # Heuristic: end of function or next 20 lines
100
+ "source_code": source_code,
101
+ "instruction": self._get_instruction_for_smell(smell),
102
+ # move_method specific
103
+ "source_module": smell.file.split('/')[-1].replace('.py', ''),
104
+ "target_module": smell.context.get('foreign_mutations', ["other_module"])[0].split('.')[0] if smell.type == "feature_envy" else "other_module",
105
+ "foreign_mutations": ", ".join(smell.context.get('foreign_mutations', [])),
106
+ "foreign_mutations_context": f"This code mutates state in {', '.join(set([v.split('.')[0] for v in smell.context.get('foreign_mutations', []) if '.' in v]))}",
107
+ "dependencies": ", ".join(set([m.variable for m in mutations if '.' in m.variable])),
108
+ "reachability": self.result.functions.get(smell.name.split(': ')[-1], {}).reachability if hasattr(self.result.functions.get(smell.name.split(': ')[-1]), 'reachability') else "unknown"
109
+ }
110
+ return context
111
+
112
+ def _get_source_context(self, file_path: str, start_line: int, max_lines: int = 50) -> str:
113
+ """Read source code lines from a file."""
114
+ if not os.path.exists(file_path):
115
+ return "# Source file not found."
116
+
117
+ try:
118
+ with open(file_path, 'r') as f:
119
+ content = f.read()
120
+
121
+ # If tree-sitter is available, use it to accurately find function boundaries
122
+ if self.parser and "method" not in file_path: # simplified check
123
+ tree = self.parser.parse(bytes(content, "utf8"))
124
+ root_node = tree.root_node
125
+
126
+ # Simple function extraction using tree-sitter
127
+ # (Ideally we'd search for the function node at start_line)
128
+ lines = content.splitlines()
129
+ start = max(0, start_line - 1)
130
+ end = min(len(lines), start + max_lines)
131
+ return "\n".join(lines[start:end])
132
+ else:
133
+ lines = content.splitlines()
134
+ start = max(0, start_line - 1)
135
+ end = min(len(lines), start + max_lines)
136
+ return "\n".join(lines[start:end])
137
+ except Exception as e:
138
+ return f"# Error reading source: {e}"
139
+
140
+ def _get_instruction_for_smell(self, smell: CodeSmell) -> str:
141
+ """Generate specific instruction based on smell type."""
142
+ if smell.type == "god_function":
143
+ return f"Wyekstrahuj mniejsze, spójne metody z funkcji {smell.name.split(': ')[-1]}. Skup się na wydzieleniu operacji o największej liczbie mutacji."
144
+ elif smell.type == "feature_envy":
145
+ return f"Przenieś metodę {smell.name.split(': ')[-1]} do modułu, który posiada większość używanych w niej danych. Zmniejsz coupling między modułami."
146
+ elif smell.type == "bottleneck":
147
+ return f"Funkcja {smell.name.split(': ')[-1]} jest wąskim gardłem strukturalnym. Wyekstrahuj z niej niezależne części pomocnicze, aby ułatwić zrozumienie przepływu."
148
+ elif smell.type == "circular_dependency":
149
+ return f"Wykryto cykl zależności. Przenieś część logiki do nowego modułu lub użyj interfejsu, aby przerwać cykl."
150
+ return "Zrefaktoryzuj ten fragment kodu, aby poprawić jego strukturę i zmniejszyć złożoność."
File without changes
@@ -0,0 +1,196 @@
1
+ """Graph visualization using NetworkX and matplotlib."""
2
+
3
+ import matplotlib.pyplot as plt
4
+ import matplotlib.patches as patches
5
+ import networkx as nx
6
+ from typing import Dict
7
+ from pathlib import Path
8
+
9
+ from ..core.models import AnalysisResult
10
+ from ..core.config import NODE_COLORS
11
+
12
+
13
+ class GraphVisualizer:
14
+ """Visualize analysis results as graphs."""
15
+
16
+ def __init__(self, result: AnalysisResult):
17
+ self.result = result
18
+ self.graph = nx.DiGraph()
19
+ self._build_graph()
20
+
21
+ def _build_graph(self):
22
+ """Build NetworkX graph from analysis result."""
23
+ # Add nodes
24
+ for node_id, node in self.result.nodes.items():
25
+ color = NODE_COLORS.get(node.type, '#757575')
26
+ self.graph.add_node(
27
+ node_id,
28
+ label=node.label[:30],
29
+ type=node.type,
30
+ color=color,
31
+ function=node.function
32
+ )
33
+
34
+ # Add edges
35
+ for edge in self.result.edges:
36
+ self.graph.add_edge(
37
+ edge.source,
38
+ edge.target,
39
+ edge_type=edge.edge_type,
40
+ conditions=edge.conditions
41
+ )
42
+
43
+ def visualize_cfg(self, filepath: str, layout: str = 'spring'):
44
+ """Create control flow visualization."""
45
+ plt.figure(figsize=(16, 12))
46
+
47
+ # Choose layout
48
+ if layout == 'spring':
49
+ pos = nx.spring_layout(self.graph, k=2, iterations=50, seed=42)
50
+ elif layout == 'hierarchical':
51
+ pos = self._hierarchical_layout()
52
+ elif layout == 'kamada':
53
+ pos = nx.kamada_kawai_layout(self.graph)
54
+ else:
55
+ pos = nx.shell_layout(self.graph)
56
+
57
+ # Get node colors
58
+ node_colors = []
59
+ for node_id in self.graph.nodes():
60
+ node_type = self.graph.nodes[node_id].get('type', 'DEFAULT')
61
+ node_colors.append(NODE_COLORS.get(node_type, '#757575'))
62
+
63
+ # Draw graph
64
+ nx.draw_networkx_nodes(
65
+ self.graph, pos,
66
+ node_color=node_colors,
67
+ node_size=600,
68
+ alpha=0.8,
69
+ edgecolors='white',
70
+ linewidths=2
71
+ )
72
+
73
+ nx.draw_networkx_edges(
74
+ self.graph, pos,
75
+ alpha=0.4,
76
+ arrows=True,
77
+ arrowsize=15,
78
+ arrowstyle='->',
79
+ edge_color='#666666',
80
+ width=1.5
81
+ )
82
+
83
+ # Draw labels for important nodes
84
+ labels = {}
85
+ for node_id in self.graph.nodes():
86
+ node_data = self.graph.nodes[node_id]
87
+ if node_data.get('type') in ['FUNC', 'IF', 'CALL']:
88
+ label = node_data.get('label', '') or ''
89
+ labels[node_id] = label[:25]
90
+
91
+ nx.draw_networkx_labels(
92
+ self.graph, pos, labels,
93
+ font_size=8,
94
+ font_color='white',
95
+ font_weight='bold'
96
+ )
97
+
98
+ # Add legend
99
+ legend_elements = [
100
+ patches.Patch(color=NODE_COLORS['FUNC'], label='Function'),
101
+ patches.Patch(color=NODE_COLORS['CALL'], label='Call'),
102
+ patches.Patch(color=NODE_COLORS['IF'], label='Decision'),
103
+ patches.Patch(color=NODE_COLORS['FOR'], label='Loop'),
104
+ patches.Patch(color=NODE_COLORS['RETURN'], label='Return')
105
+ ]
106
+ plt.legend(handles=legend_elements, loc='upper right', fontsize=10)
107
+
108
+ plt.title('Control Flow Graph', fontsize=16, fontweight='bold')
109
+ plt.axis('off')
110
+ plt.tight_layout()
111
+ plt.savefig(filepath, dpi=300, bbox_inches='tight', facecolor='white')
112
+ plt.close()
113
+
114
+ def visualize_call_graph(self, filepath: str):
115
+ """Visualize call graph."""
116
+ # Build call graph
117
+ call_graph = nx.DiGraph()
118
+
119
+ for func_name, func_info in self.result.functions.items():
120
+ short_name = func_name.split('.')[-1]
121
+ call_graph.add_node(func_name, label=short_name)
122
+
123
+ for callee in func_info.calls:
124
+ call_graph.add_edge(func_name, callee)
125
+
126
+ if len(call_graph.nodes()) == 0:
127
+ return # Nothing to visualize
128
+
129
+ plt.figure(figsize=(14, 10))
130
+
131
+ # Layout
132
+ pos = nx.spring_layout(call_graph, k=1.5, iterations=50, seed=42)
133
+
134
+ # Node sizes based on calls
135
+ node_sizes = []
136
+ for node in call_graph.nodes():
137
+ out_degree = call_graph.out_degree(node)
138
+ in_degree = call_graph.in_degree(node)
139
+ size = 300 + (out_degree + in_degree) * 150
140
+ node_sizes.append(size)
141
+
142
+ # Draw
143
+ nx.draw_networkx_nodes(
144
+ call_graph, pos,
145
+ node_color='#4CAF50',
146
+ node_size=node_sizes,
147
+ alpha=0.8,
148
+ edgecolors='white',
149
+ linewidths=2
150
+ )
151
+
152
+ nx.draw_networkx_edges(
153
+ call_graph, pos,
154
+ alpha=0.5,
155
+ arrows=True,
156
+ arrowsize=20,
157
+ edge_color='#2196F3',
158
+ width=2
159
+ )
160
+
161
+ # Labels
162
+ labels = {n: (call_graph.nodes[n].get('label') or n)[:20] for n in call_graph.nodes()}
163
+ nx.draw_networkx_labels(
164
+ call_graph, pos, labels,
165
+ font_size=9,
166
+ font_weight='bold'
167
+ )
168
+
169
+ plt.title('Function Call Graph', fontsize=16, fontweight='bold')
170
+ plt.axis('off')
171
+ plt.tight_layout()
172
+ plt.savefig(filepath, dpi=300, bbox_inches='tight', facecolor='white')
173
+ plt.close()
174
+
175
+ def _hierarchical_layout(self) -> Dict:
176
+ """Create hierarchical layout grouped by function."""
177
+ from collections import defaultdict
178
+
179
+ # Group nodes by function
180
+ function_groups = defaultdict(list)
181
+ for node_id, node in self.result.nodes.items():
182
+ func = node.function or '__global__'
183
+ function_groups[func].append(node_id)
184
+
185
+ # Position nodes
186
+ pos = {}
187
+ y_offset = 0
188
+
189
+ for func_name, nodes in sorted(function_groups.items()):
190
+ for i, node_id in enumerate(nodes):
191
+ x = i * 2
192
+ y = -y_offset
193
+ pos[node_id] = (x, y)
194
+ y_offset += 3
195
+
196
+ return pos