code2llm 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code2flow/__init__.py +47 -0
- code2flow/__main__.py +6 -0
- code2flow/analysis/__init__.py +23 -0
- code2flow/analysis/call_graph.py +210 -0
- code2flow/analysis/cfg.py +293 -0
- code2flow/analysis/coupling.py +77 -0
- code2flow/analysis/data_analysis.py +249 -0
- code2flow/analysis/dfg.py +224 -0
- code2flow/analysis/pipeline_detector.py +445 -0
- code2flow/analysis/side_effects.py +313 -0
- code2flow/analysis/smells.py +192 -0
- code2flow/analysis/type_inference.py +306 -0
- code2flow/cli.py +493 -0
- code2flow/core/__init__.py +36 -0
- code2flow/core/analyzer.py +765 -0
- code2flow/core/config.py +177 -0
- code2flow/core/models.py +194 -0
- code2flow/core/streaming_analyzer.py +666 -0
- code2flow/exporters/__init__.py +35 -0
- code2flow/exporters/base.py +13 -0
- code2flow/exporters/context_exporter.py +207 -0
- code2flow/exporters/flow_exporter.py +570 -0
- code2flow/exporters/json_exporter.py +17 -0
- code2flow/exporters/llm_exporter.py +12 -0
- code2flow/exporters/map_exporter.py +218 -0
- code2flow/exporters/mermaid_exporter.py +67 -0
- code2flow/exporters/toon.py +982 -0
- code2flow/exporters/yaml_exporter.py +108 -0
- code2flow/llm_flow_generator.py +451 -0
- code2flow/llm_task_generator.py +263 -0
- code2flow/mermaid_generator.py +481 -0
- code2flow/nlp/__init__.py +23 -0
- code2flow/nlp/config.py +174 -0
- code2flow/nlp/entity_resolution.py +326 -0
- code2flow/nlp/intent_matching.py +297 -0
- code2flow/nlp/normalization.py +122 -0
- code2flow/nlp/pipeline.py +388 -0
- code2flow/patterns/__init__.py +0 -0
- code2flow/patterns/detector.py +168 -0
- code2flow/refactor/__init__.py +0 -0
- code2flow/refactor/prompt_engine.py +150 -0
- code2flow/visualizers/__init__.py +0 -0
- code2flow/visualizers/graph.py +196 -0
- code2llm-0.3.7.dist-info/METADATA +604 -0
- code2llm-0.3.7.dist-info/RECORD +49 -0
- code2llm-0.3.7.dist-info/WHEEL +5 -0
- code2llm-0.3.7.dist-info/entry_points.txt +2 -0
- code2llm-0.3.7.dist-info/licenses/LICENSE +201 -0
- code2llm-0.3.7.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Engine for generating refactoring prompts using Jinja2 templates."""
|
|
2
|
+
import os
|
|
3
|
+
import jinja2
|
|
4
|
+
import tiktoken
|
|
5
|
+
from tree_sitter import Language, Parser
|
|
6
|
+
import tree_sitter_python
|
|
7
|
+
from typing import List, Dict, Any, Optional
|
|
8
|
+
from ..core.models import AnalysisResult, CodeSmell
|
|
9
|
+
|
|
10
|
+
class PromptEngine:
|
|
11
|
+
"""Generate refactoring prompts from analysis results and detected smells."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, result: AnalysisResult, template_dir: Optional[str] = None):
|
|
14
|
+
if template_dir is None:
|
|
15
|
+
# Default to templates directory relative to this file
|
|
16
|
+
template_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'templates')
|
|
17
|
+
|
|
18
|
+
self.result = result
|
|
19
|
+
self.env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir))
|
|
20
|
+
|
|
21
|
+
# Initialize tiktoken for context management
|
|
22
|
+
try:
|
|
23
|
+
self.encoding = tiktoken.get_encoding("cl100k_base") # GPT-4/3.5-turbo encoding
|
|
24
|
+
except Exception:
|
|
25
|
+
self.encoding = None
|
|
26
|
+
|
|
27
|
+
# Initialize tree-sitter for precision extraction
|
|
28
|
+
try:
|
|
29
|
+
self.PY_LANGUAGE = Language(tree_sitter_python.language())
|
|
30
|
+
self.parser = Parser(self.PY_LANGUAGE)
|
|
31
|
+
except Exception:
|
|
32
|
+
self.parser = None
|
|
33
|
+
|
|
34
|
+
def generate_prompts(self) -> Dict[str, str]:
|
|
35
|
+
"""Generate a prompt for each detected code smell."""
|
|
36
|
+
prompts = {}
|
|
37
|
+
|
|
38
|
+
for i, smell in enumerate(self.result.smells):
|
|
39
|
+
prompt = self._generate_prompt_for_smell(smell)
|
|
40
|
+
if prompt:
|
|
41
|
+
# Truncate prompt if it exceeds token limit (e.g., 4000 tokens)
|
|
42
|
+
if self.encoding:
|
|
43
|
+
tokens = self.encoding.encode(prompt)
|
|
44
|
+
if len(tokens) > 4000:
|
|
45
|
+
prompt = self.encoding.decode(tokens[:3800]) + "\n\n... (prompt truncated due to length) ..."
|
|
46
|
+
|
|
47
|
+
# Use a unique name for each prompt
|
|
48
|
+
filename = f"{i+1:02d}_{smell.type}_{smell.name.lower().replace(' ', '_').replace(':', '')}.md"
|
|
49
|
+
prompts[filename] = prompt
|
|
50
|
+
return prompts
|
|
51
|
+
|
|
52
|
+
def _generate_prompt_for_smell(self, smell: CodeSmell) -> Optional[str]:
|
|
53
|
+
"""Generate a single prompt from a CodeSmell."""
|
|
54
|
+
template_name = self._get_template_for_type(smell.type)
|
|
55
|
+
if not template_name:
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
template = self.env.get_template(template_name)
|
|
60
|
+
context = self._build_context_for_smell(smell)
|
|
61
|
+
return template.render(**context)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
print(f"Error generating prompt for {smell.name}: {e}")
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
def _get_template_for_type(self, smell_type: str) -> Optional[str]:
|
|
67
|
+
"""Map smell type to Jinja2 template filename."""
|
|
68
|
+
mapping = {
|
|
69
|
+
"god_function": "extract_method.md",
|
|
70
|
+
"feature_envy": "move_method.md",
|
|
71
|
+
"data_clump": "move_method.md",
|
|
72
|
+
"shotgun_surgery": "extract_method.md",
|
|
73
|
+
"bottleneck": "extract_method.md",
|
|
74
|
+
"circular_dependency": "move_method.md"
|
|
75
|
+
}
|
|
76
|
+
return mapping.get(smell_type)
|
|
77
|
+
|
|
78
|
+
def _build_context_for_smell(self, smell: CodeSmell) -> Dict[str, Any]:
|
|
79
|
+
"""Prepare context data for the Jinja2 template."""
|
|
80
|
+
# Extract source code for context
|
|
81
|
+
source_code = self._get_source_context(smell.file, smell.line)
|
|
82
|
+
|
|
83
|
+
# Prepare metrics
|
|
84
|
+
metrics = self.result.metrics.get(smell.name.split(': ')[-1], {}) # Heuristic to find function name
|
|
85
|
+
if not metrics and 'function' in smell.context:
|
|
86
|
+
metrics = self.result.metrics.get(smell.context['function'], {})
|
|
87
|
+
|
|
88
|
+
# Prepare mutations
|
|
89
|
+
mutations = [m for m in self.result.mutations if m.scope in (smell.name.split(': ')[-1], smell.context.get('function'))]
|
|
90
|
+
mutations_summary = f"{len(mutations)} modifications recorded: {', '.join(set([m.variable for m in mutations[:5]]))}..."
|
|
91
|
+
|
|
92
|
+
context = {
|
|
93
|
+
"target_function": smell.name.split(': ')[-1],
|
|
94
|
+
"reason": smell.description,
|
|
95
|
+
"metrics": metrics,
|
|
96
|
+
"mutations_context": mutations_summary,
|
|
97
|
+
"source_file": smell.file,
|
|
98
|
+
"start_line": smell.line,
|
|
99
|
+
"end_line": smell.line + 20, # Heuristic: end of function or next 20 lines
|
|
100
|
+
"source_code": source_code,
|
|
101
|
+
"instruction": self._get_instruction_for_smell(smell),
|
|
102
|
+
# move_method specific
|
|
103
|
+
"source_module": smell.file.split('/')[-1].replace('.py', ''),
|
|
104
|
+
"target_module": smell.context.get('foreign_mutations', ["other_module"])[0].split('.')[0] if smell.type == "feature_envy" else "other_module",
|
|
105
|
+
"foreign_mutations": ", ".join(smell.context.get('foreign_mutations', [])),
|
|
106
|
+
"foreign_mutations_context": f"This code mutates state in {', '.join(set([v.split('.')[0] for v in smell.context.get('foreign_mutations', []) if '.' in v]))}",
|
|
107
|
+
"dependencies": ", ".join(set([m.variable for m in mutations if '.' in m.variable])),
|
|
108
|
+
"reachability": self.result.functions.get(smell.name.split(': ')[-1], {}).reachability if hasattr(self.result.functions.get(smell.name.split(': ')[-1]), 'reachability') else "unknown"
|
|
109
|
+
}
|
|
110
|
+
return context
|
|
111
|
+
|
|
112
|
+
def _get_source_context(self, file_path: str, start_line: int, max_lines: int = 50) -> str:
|
|
113
|
+
"""Read source code lines from a file."""
|
|
114
|
+
if not os.path.exists(file_path):
|
|
115
|
+
return "# Source file not found."
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
with open(file_path, 'r') as f:
|
|
119
|
+
content = f.read()
|
|
120
|
+
|
|
121
|
+
# If tree-sitter is available, use it to accurately find function boundaries
|
|
122
|
+
if self.parser and "method" not in file_path: # simplified check
|
|
123
|
+
tree = self.parser.parse(bytes(content, "utf8"))
|
|
124
|
+
root_node = tree.root_node
|
|
125
|
+
|
|
126
|
+
# Simple function extraction using tree-sitter
|
|
127
|
+
# (Ideally we'd search for the function node at start_line)
|
|
128
|
+
lines = content.splitlines()
|
|
129
|
+
start = max(0, start_line - 1)
|
|
130
|
+
end = min(len(lines), start + max_lines)
|
|
131
|
+
return "\n".join(lines[start:end])
|
|
132
|
+
else:
|
|
133
|
+
lines = content.splitlines()
|
|
134
|
+
start = max(0, start_line - 1)
|
|
135
|
+
end = min(len(lines), start + max_lines)
|
|
136
|
+
return "\n".join(lines[start:end])
|
|
137
|
+
except Exception as e:
|
|
138
|
+
return f"# Error reading source: {e}"
|
|
139
|
+
|
|
140
|
+
def _get_instruction_for_smell(self, smell: CodeSmell) -> str:
|
|
141
|
+
"""Generate specific instruction based on smell type."""
|
|
142
|
+
if smell.type == "god_function":
|
|
143
|
+
return f"Wyekstrahuj mniejsze, spójne metody z funkcji {smell.name.split(': ')[-1]}. Skup się na wydzieleniu operacji o największej liczbie mutacji."
|
|
144
|
+
elif smell.type == "feature_envy":
|
|
145
|
+
return f"Przenieś metodę {smell.name.split(': ')[-1]} do modułu, który posiada większość używanych w niej danych. Zmniejsz coupling między modułami."
|
|
146
|
+
elif smell.type == "bottleneck":
|
|
147
|
+
return f"Funkcja {smell.name.split(': ')[-1]} jest wąskim gardłem strukturalnym. Wyekstrahuj z niej niezależne części pomocnicze, aby ułatwić zrozumienie przepływu."
|
|
148
|
+
elif smell.type == "circular_dependency":
|
|
149
|
+
return f"Wykryto cykl zależności. Przenieś część logiki do nowego modułu lub użyj interfejsu, aby przerwać cykl."
|
|
150
|
+
return "Zrefaktoryzuj ten fragment kodu, aby poprawić jego strukturę i zmniejszyć złożoność."
|
|
File without changes
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Graph visualization using NetworkX and matplotlib."""
|
|
2
|
+
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
import matplotlib.patches as patches
|
|
5
|
+
import networkx as nx
|
|
6
|
+
from typing import Dict
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from ..core.models import AnalysisResult
|
|
10
|
+
from ..core.config import NODE_COLORS
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GraphVisualizer:
|
|
14
|
+
"""Visualize analysis results as graphs."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, result: AnalysisResult):
|
|
17
|
+
self.result = result
|
|
18
|
+
self.graph = nx.DiGraph()
|
|
19
|
+
self._build_graph()
|
|
20
|
+
|
|
21
|
+
def _build_graph(self):
|
|
22
|
+
"""Build NetworkX graph from analysis result."""
|
|
23
|
+
# Add nodes
|
|
24
|
+
for node_id, node in self.result.nodes.items():
|
|
25
|
+
color = NODE_COLORS.get(node.type, '#757575')
|
|
26
|
+
self.graph.add_node(
|
|
27
|
+
node_id,
|
|
28
|
+
label=node.label[:30],
|
|
29
|
+
type=node.type,
|
|
30
|
+
color=color,
|
|
31
|
+
function=node.function
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Add edges
|
|
35
|
+
for edge in self.result.edges:
|
|
36
|
+
self.graph.add_edge(
|
|
37
|
+
edge.source,
|
|
38
|
+
edge.target,
|
|
39
|
+
edge_type=edge.edge_type,
|
|
40
|
+
conditions=edge.conditions
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def visualize_cfg(self, filepath: str, layout: str = 'spring'):
|
|
44
|
+
"""Create control flow visualization."""
|
|
45
|
+
plt.figure(figsize=(16, 12))
|
|
46
|
+
|
|
47
|
+
# Choose layout
|
|
48
|
+
if layout == 'spring':
|
|
49
|
+
pos = nx.spring_layout(self.graph, k=2, iterations=50, seed=42)
|
|
50
|
+
elif layout == 'hierarchical':
|
|
51
|
+
pos = self._hierarchical_layout()
|
|
52
|
+
elif layout == 'kamada':
|
|
53
|
+
pos = nx.kamada_kawai_layout(self.graph)
|
|
54
|
+
else:
|
|
55
|
+
pos = nx.shell_layout(self.graph)
|
|
56
|
+
|
|
57
|
+
# Get node colors
|
|
58
|
+
node_colors = []
|
|
59
|
+
for node_id in self.graph.nodes():
|
|
60
|
+
node_type = self.graph.nodes[node_id].get('type', 'DEFAULT')
|
|
61
|
+
node_colors.append(NODE_COLORS.get(node_type, '#757575'))
|
|
62
|
+
|
|
63
|
+
# Draw graph
|
|
64
|
+
nx.draw_networkx_nodes(
|
|
65
|
+
self.graph, pos,
|
|
66
|
+
node_color=node_colors,
|
|
67
|
+
node_size=600,
|
|
68
|
+
alpha=0.8,
|
|
69
|
+
edgecolors='white',
|
|
70
|
+
linewidths=2
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
nx.draw_networkx_edges(
|
|
74
|
+
self.graph, pos,
|
|
75
|
+
alpha=0.4,
|
|
76
|
+
arrows=True,
|
|
77
|
+
arrowsize=15,
|
|
78
|
+
arrowstyle='->',
|
|
79
|
+
edge_color='#666666',
|
|
80
|
+
width=1.5
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Draw labels for important nodes
|
|
84
|
+
labels = {}
|
|
85
|
+
for node_id in self.graph.nodes():
|
|
86
|
+
node_data = self.graph.nodes[node_id]
|
|
87
|
+
if node_data.get('type') in ['FUNC', 'IF', 'CALL']:
|
|
88
|
+
label = node_data.get('label', '') or ''
|
|
89
|
+
labels[node_id] = label[:25]
|
|
90
|
+
|
|
91
|
+
nx.draw_networkx_labels(
|
|
92
|
+
self.graph, pos, labels,
|
|
93
|
+
font_size=8,
|
|
94
|
+
font_color='white',
|
|
95
|
+
font_weight='bold'
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Add legend
|
|
99
|
+
legend_elements = [
|
|
100
|
+
patches.Patch(color=NODE_COLORS['FUNC'], label='Function'),
|
|
101
|
+
patches.Patch(color=NODE_COLORS['CALL'], label='Call'),
|
|
102
|
+
patches.Patch(color=NODE_COLORS['IF'], label='Decision'),
|
|
103
|
+
patches.Patch(color=NODE_COLORS['FOR'], label='Loop'),
|
|
104
|
+
patches.Patch(color=NODE_COLORS['RETURN'], label='Return')
|
|
105
|
+
]
|
|
106
|
+
plt.legend(handles=legend_elements, loc='upper right', fontsize=10)
|
|
107
|
+
|
|
108
|
+
plt.title('Control Flow Graph', fontsize=16, fontweight='bold')
|
|
109
|
+
plt.axis('off')
|
|
110
|
+
plt.tight_layout()
|
|
111
|
+
plt.savefig(filepath, dpi=300, bbox_inches='tight', facecolor='white')
|
|
112
|
+
plt.close()
|
|
113
|
+
|
|
114
|
+
def visualize_call_graph(self, filepath: str):
|
|
115
|
+
"""Visualize call graph."""
|
|
116
|
+
# Build call graph
|
|
117
|
+
call_graph = nx.DiGraph()
|
|
118
|
+
|
|
119
|
+
for func_name, func_info in self.result.functions.items():
|
|
120
|
+
short_name = func_name.split('.')[-1]
|
|
121
|
+
call_graph.add_node(func_name, label=short_name)
|
|
122
|
+
|
|
123
|
+
for callee in func_info.calls:
|
|
124
|
+
call_graph.add_edge(func_name, callee)
|
|
125
|
+
|
|
126
|
+
if len(call_graph.nodes()) == 0:
|
|
127
|
+
return # Nothing to visualize
|
|
128
|
+
|
|
129
|
+
plt.figure(figsize=(14, 10))
|
|
130
|
+
|
|
131
|
+
# Layout
|
|
132
|
+
pos = nx.spring_layout(call_graph, k=1.5, iterations=50, seed=42)
|
|
133
|
+
|
|
134
|
+
# Node sizes based on calls
|
|
135
|
+
node_sizes = []
|
|
136
|
+
for node in call_graph.nodes():
|
|
137
|
+
out_degree = call_graph.out_degree(node)
|
|
138
|
+
in_degree = call_graph.in_degree(node)
|
|
139
|
+
size = 300 + (out_degree + in_degree) * 150
|
|
140
|
+
node_sizes.append(size)
|
|
141
|
+
|
|
142
|
+
# Draw
|
|
143
|
+
nx.draw_networkx_nodes(
|
|
144
|
+
call_graph, pos,
|
|
145
|
+
node_color='#4CAF50',
|
|
146
|
+
node_size=node_sizes,
|
|
147
|
+
alpha=0.8,
|
|
148
|
+
edgecolors='white',
|
|
149
|
+
linewidths=2
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
nx.draw_networkx_edges(
|
|
153
|
+
call_graph, pos,
|
|
154
|
+
alpha=0.5,
|
|
155
|
+
arrows=True,
|
|
156
|
+
arrowsize=20,
|
|
157
|
+
edge_color='#2196F3',
|
|
158
|
+
width=2
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Labels
|
|
162
|
+
labels = {n: (call_graph.nodes[n].get('label') or n)[:20] for n in call_graph.nodes()}
|
|
163
|
+
nx.draw_networkx_labels(
|
|
164
|
+
call_graph, pos, labels,
|
|
165
|
+
font_size=9,
|
|
166
|
+
font_weight='bold'
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
plt.title('Function Call Graph', fontsize=16, fontweight='bold')
|
|
170
|
+
plt.axis('off')
|
|
171
|
+
plt.tight_layout()
|
|
172
|
+
plt.savefig(filepath, dpi=300, bbox_inches='tight', facecolor='white')
|
|
173
|
+
plt.close()
|
|
174
|
+
|
|
175
|
+
def _hierarchical_layout(self) -> Dict:
|
|
176
|
+
"""Create hierarchical layout grouped by function."""
|
|
177
|
+
from collections import defaultdict
|
|
178
|
+
|
|
179
|
+
# Group nodes by function
|
|
180
|
+
function_groups = defaultdict(list)
|
|
181
|
+
for node_id, node in self.result.nodes.items():
|
|
182
|
+
func = node.function or '__global__'
|
|
183
|
+
function_groups[func].append(node_id)
|
|
184
|
+
|
|
185
|
+
# Position nodes
|
|
186
|
+
pos = {}
|
|
187
|
+
y_offset = 0
|
|
188
|
+
|
|
189
|
+
for func_name, nodes in sorted(function_groups.items()):
|
|
190
|
+
for i, node_id in enumerate(nodes):
|
|
191
|
+
x = i * 2
|
|
192
|
+
y = -y_offset
|
|
193
|
+
pos[node_id] = (x, y)
|
|
194
|
+
y_offset += 3
|
|
195
|
+
|
|
196
|
+
return pos
|