code2llm 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. code2flow/__init__.py +47 -0
  2. code2flow/__main__.py +6 -0
  3. code2flow/analysis/__init__.py +23 -0
  4. code2flow/analysis/call_graph.py +210 -0
  5. code2flow/analysis/cfg.py +293 -0
  6. code2flow/analysis/coupling.py +77 -0
  7. code2flow/analysis/data_analysis.py +249 -0
  8. code2flow/analysis/dfg.py +224 -0
  9. code2flow/analysis/pipeline_detector.py +445 -0
  10. code2flow/analysis/side_effects.py +313 -0
  11. code2flow/analysis/smells.py +192 -0
  12. code2flow/analysis/type_inference.py +306 -0
  13. code2flow/cli.py +493 -0
  14. code2flow/core/__init__.py +36 -0
  15. code2flow/core/analyzer.py +765 -0
  16. code2flow/core/config.py +177 -0
  17. code2flow/core/models.py +194 -0
  18. code2flow/core/streaming_analyzer.py +666 -0
  19. code2flow/exporters/__init__.py +35 -0
  20. code2flow/exporters/base.py +13 -0
  21. code2flow/exporters/context_exporter.py +207 -0
  22. code2flow/exporters/flow_exporter.py +570 -0
  23. code2flow/exporters/json_exporter.py +17 -0
  24. code2flow/exporters/llm_exporter.py +12 -0
  25. code2flow/exporters/map_exporter.py +218 -0
  26. code2flow/exporters/mermaid_exporter.py +67 -0
  27. code2flow/exporters/toon.py +982 -0
  28. code2flow/exporters/yaml_exporter.py +108 -0
  29. code2flow/llm_flow_generator.py +451 -0
  30. code2flow/llm_task_generator.py +263 -0
  31. code2flow/mermaid_generator.py +481 -0
  32. code2flow/nlp/__init__.py +23 -0
  33. code2flow/nlp/config.py +174 -0
  34. code2flow/nlp/entity_resolution.py +326 -0
  35. code2flow/nlp/intent_matching.py +297 -0
  36. code2flow/nlp/normalization.py +122 -0
  37. code2flow/nlp/pipeline.py +388 -0
  38. code2flow/patterns/__init__.py +0 -0
  39. code2flow/patterns/detector.py +168 -0
  40. code2flow/refactor/__init__.py +0 -0
  41. code2flow/refactor/prompt_engine.py +150 -0
  42. code2flow/visualizers/__init__.py +0 -0
  43. code2flow/visualizers/graph.py +196 -0
  44. code2llm-0.3.7.dist-info/METADATA +604 -0
  45. code2llm-0.3.7.dist-info/RECORD +49 -0
  46. code2llm-0.3.7.dist-info/WHEEL +5 -0
  47. code2llm-0.3.7.dist-info/entry_points.txt +2 -0
  48. code2llm-0.3.7.dist-info/licenses/LICENSE +201 -0
  49. code2llm-0.3.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,313 @@
1
+ """Side-effect detector — AST-based side-effect classification.
2
+
3
+ Scans Python function bodies to detect:
4
+ - IO: open(), read(), write(), print(), file operations
5
+ - Cache: cache lookups/stores, memoization, lru_cache
6
+ - Mutation: self.x = ..., global, del, list.append/insert
7
+ - Pure: no detected side effects
8
+
9
+ Used by FlowExporter to enrich CONTRACTS and SIDE_EFFECTS sections.
10
+ """
11
+
12
+ import ast
13
+ import logging
14
+ from pathlib import Path
15
+ from typing import Any, Dict, List, Optional, Set
16
+
17
+ from ..core.models import FunctionInfo
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Side-effect classification patterns
22
+ IO_CALLS = frozenset({
23
+ "open", "read", "write", "print", "input",
24
+ "mkdir", "makedirs", "rmdir", "remove", "unlink", "rename",
25
+ "read_text", "write_text", "read_bytes", "write_bytes",
26
+ "dump", "dumps", "load", "loads",
27
+ "save", "savefig",
28
+ "send", "recv", "connect", "listen", "accept",
29
+ "get", "post", "put", "delete", "patch", # HTTP
30
+ "execute", "commit", "rollback", # DB
31
+ })
32
+
33
+ IO_ATTRIBUTES = frozenset({
34
+ "write", "read", "readline", "readlines", "writelines",
35
+ "flush", "close", "seek", "tell",
36
+ "send", "recv", "sendall",
37
+ })
38
+
39
+ CACHE_INDICATORS = frozenset({
40
+ "cache", "lru_cache", "memoize", "cached_property",
41
+ "Cache", "FileCache",
42
+ })
43
+
44
+ CACHE_CALLS = frozenset({
45
+ "cache_get", "cache_set", "cache_delete", "cache_clear",
46
+ "get_cached", "set_cached",
47
+ })
48
+
49
+ MUTATION_CALLS = frozenset({
50
+ "append", "extend", "insert", "pop", "remove", "clear",
51
+ "update", "setdefault", "add", "discard",
52
+ "sort", "reverse",
53
+ })
54
+
55
+
56
+ class SideEffectInfo:
57
+ """Side-effect analysis result for a single function."""
58
+
59
+ __slots__ = (
60
+ "function_name", "qualified_name", "classification",
61
+ "io_operations", "cache_operations", "mutations",
62
+ "global_refs", "self_mutations", "has_yield",
63
+ )
64
+
65
+ def __init__(self, function_name: str, qualified_name: str):
66
+ self.function_name = function_name
67
+ self.qualified_name = qualified_name
68
+ self.classification: str = "pure" # pure | IO | cache | mutation
69
+ self.io_operations: List[str] = []
70
+ self.cache_operations: List[str] = []
71
+ self.mutations: List[str] = []
72
+ self.global_refs: List[str] = []
73
+ self.self_mutations: List[str] = []
74
+ self.has_yield: bool = False
75
+
76
+ @property
77
+ def is_pure(self) -> bool:
78
+ return self.classification == "pure"
79
+
80
+ @property
81
+ def side_effect_summary(self) -> str:
82
+ """One-line summary of side effects."""
83
+ parts = []
84
+ if self.io_operations:
85
+ parts.append(f"IO({', '.join(self.io_operations[:3])})")
86
+ if self.cache_operations:
87
+ parts.append(f"cache({', '.join(self.cache_operations[:2])})")
88
+ if self.self_mutations:
89
+ parts.append(f"mutates self.{', self.'.join(self.self_mutations[:3])}")
90
+ if self.global_refs:
91
+ parts.append(f"global({', '.join(self.global_refs[:2])})")
92
+ if self.has_yield:
93
+ parts.append("generator")
94
+ return "; ".join(parts) if parts else "pure"
95
+
96
+ def to_dict(self) -> Dict[str, Any]:
97
+ return {
98
+ "function": self.function_name,
99
+ "qualified_name": self.qualified_name,
100
+ "classification": self.classification,
101
+ "io": self.io_operations,
102
+ "cache": self.cache_operations,
103
+ "mutations": self.mutations,
104
+ "globals": self.global_refs,
105
+ "self_mutations": self.self_mutations,
106
+ "has_yield": self.has_yield,
107
+ "summary": self.side_effect_summary,
108
+ }
109
+
110
+
111
+ class SideEffectDetector:
112
+ """Detect side effects in Python functions via AST analysis.
113
+
114
+ Scans function bodies for IO operations, cache usage, mutations,
115
+ global references, and self-attribute mutations.
116
+ """
117
+
118
+ def __init__(self):
119
+ self._ast_cache: Dict[str, Optional[ast.Module]] = {}
120
+
121
+ def analyze_function(self, fi: FunctionInfo) -> SideEffectInfo:
122
+ """Analyze a single function for side effects."""
123
+ info = SideEffectInfo(fi.name, fi.qualified_name)
124
+
125
+ tree = self._get_ast(fi.file)
126
+ if tree:
127
+ node = self._find_function_node(tree, fi.name, fi.line)
128
+ if node:
129
+ self._scan_node(node, info)
130
+ self._classify(info)
131
+ return info
132
+
133
+ # Fallback: heuristic from function name and calls
134
+ self._heuristic_classify(fi, info)
135
+ return info
136
+
137
+ def analyze_all(
138
+ self, funcs: Dict[str, FunctionInfo]
139
+ ) -> Dict[str, SideEffectInfo]:
140
+ """Batch-analyze all functions for side effects."""
141
+ results: Dict[str, SideEffectInfo] = {}
142
+ for qname, fi in funcs.items():
143
+ results[qname] = self.analyze_function(fi)
144
+ return results
145
+
146
+ def get_purity_score(self, fi: FunctionInfo) -> str:
147
+ """Get purity classification: pure | IO | cache | mutation."""
148
+ return self.analyze_function(fi).classification
149
+
150
+ # ------------------------------------------------------------------
151
+ # AST scanning
152
+ # ------------------------------------------------------------------
153
+ def _scan_node(self, func_node: ast.FunctionDef, info: SideEffectInfo) -> None:
154
+ """Walk function body and detect side-effect patterns."""
155
+ for node in ast.walk(func_node):
156
+ self._check_calls(node, info)
157
+ self._check_assignments(node, info)
158
+ self._check_globals(node, info)
159
+ self._check_yield(node, info)
160
+ self._check_delete(node, info)
161
+
162
+ def _check_calls(self, node: ast.AST, info: SideEffectInfo) -> None:
163
+ """Detect IO and cache calls."""
164
+ if not isinstance(node, ast.Call):
165
+ return
166
+
167
+ call_name = self._get_call_name(node.func)
168
+ if not call_name:
169
+ return
170
+
171
+ parts = call_name.split(".")
172
+ base_name = parts[-1]
173
+
174
+ # IO detection
175
+ if base_name in IO_CALLS:
176
+ info.io_operations.append(base_name)
177
+ elif base_name in IO_ATTRIBUTES:
178
+ info.io_operations.append(call_name)
179
+
180
+ # Cache detection
181
+ if base_name in CACHE_CALLS:
182
+ info.cache_operations.append(base_name)
183
+ elif any(ci in call_name for ci in CACHE_INDICATORS):
184
+ info.cache_operations.append(call_name)
185
+
186
+ # Mutation via method calls (e.g., list.append)
187
+ if base_name in MUTATION_CALLS and len(parts) >= 2:
188
+ info.mutations.append(call_name)
189
+
190
+ def _check_assignments(self, node: ast.AST, info: SideEffectInfo) -> None:
191
+ """Detect self.x = ... and augmented assignments."""
192
+ if isinstance(node, (ast.Assign, ast.AugAssign, ast.AnnAssign)):
193
+ targets = []
194
+ if isinstance(node, ast.Assign):
195
+ targets = node.targets
196
+ elif isinstance(node, (ast.AugAssign, ast.AnnAssign)):
197
+ targets = [node.target]
198
+
199
+ for target in targets:
200
+ if isinstance(target, ast.Attribute):
201
+ if isinstance(target.value, ast.Name) and target.value.id == "self":
202
+ info.self_mutations.append(target.attr)
203
+
204
+ def _check_globals(self, node: ast.AST, info: SideEffectInfo) -> None:
205
+ """Detect global/nonlocal references."""
206
+ if isinstance(node, ast.Global):
207
+ info.global_refs.extend(node.names)
208
+ elif isinstance(node, ast.Nonlocal):
209
+ info.global_refs.extend(node.names)
210
+
211
+ def _check_yield(self, node: ast.AST, info: SideEffectInfo) -> None:
212
+ """Detect generator functions."""
213
+ if isinstance(node, (ast.Yield, ast.YieldFrom)):
214
+ info.has_yield = True
215
+
216
+ def _check_delete(self, node: ast.AST, info: SideEffectInfo) -> None:
217
+ """Detect del statements on attributes."""
218
+ if isinstance(node, ast.Delete):
219
+ for target in node.targets:
220
+ if isinstance(target, ast.Attribute):
221
+ if isinstance(target.value, ast.Name) and target.value.id == "self":
222
+ info.self_mutations.append(f"del:{target.attr}")
223
+
224
+ # ------------------------------------------------------------------
225
+ # classification
226
+ # ------------------------------------------------------------------
227
+ def _classify(self, info: SideEffectInfo) -> None:
228
+ """Classify function based on detected side effects."""
229
+ # Priority: IO > cache > mutation > pure
230
+ if info.io_operations:
231
+ info.classification = "IO"
232
+ elif info.cache_operations:
233
+ info.classification = "cache"
234
+ elif info.self_mutations or info.mutations or info.global_refs:
235
+ info.classification = "mutation"
236
+ else:
237
+ info.classification = "pure"
238
+
239
+ def _heuristic_classify(
240
+ self, fi: FunctionInfo, info: SideEffectInfo
241
+ ) -> None:
242
+ """Classify based on function name and calls (fallback)."""
243
+ name_lower = fi.name.lower()
244
+ calls_lower = {c.lower() for c in fi.calls}
245
+
246
+ io_words = {"write", "read", "open", "save", "load", "export",
247
+ "dump", "print", "mkdir", "rmdir", "remove"}
248
+ cache_words = {"cache", "memoize", "lru_cache", "store", "fetch"}
249
+ mutation_words = {"set_", "update", "modify", "mutate", "append",
250
+ "insert", "delete", "fix", "patch"}
251
+
252
+ if any(w in name_lower for w in io_words):
253
+ info.classification = "IO"
254
+ info.io_operations.append(f"name:{fi.name}")
255
+ elif any(any(w in c for w in io_words) for c in calls_lower):
256
+ info.classification = "IO"
257
+ info.io_operations.append("calls:IO")
258
+ elif any(w in name_lower for w in cache_words):
259
+ info.classification = "cache"
260
+ info.cache_operations.append(f"name:{fi.name}")
261
+ elif any(any(w in c for w in cache_words) for c in calls_lower):
262
+ info.classification = "cache"
263
+ info.cache_operations.append("calls:cache")
264
+ elif any(w in name_lower for w in mutation_words):
265
+ info.classification = "mutation"
266
+ info.mutations.append(f"name:{fi.name}")
267
+ else:
268
+ info.classification = "pure"
269
+
270
+ # ------------------------------------------------------------------
271
+ # AST helpers
272
+ # ------------------------------------------------------------------
273
+ def _get_ast(self, file_path: str) -> Optional[ast.Module]:
274
+ """Parse and cache AST for a source file."""
275
+ if not file_path:
276
+ return None
277
+ if file_path in self._ast_cache:
278
+ return self._ast_cache[file_path]
279
+
280
+ try:
281
+ source = Path(file_path).read_text(encoding="utf-8", errors="replace")
282
+ tree = ast.parse(source, filename=file_path)
283
+ self._ast_cache[file_path] = tree
284
+ except (OSError, SyntaxError) as e:
285
+ logger.debug("Cannot parse %s: %s", file_path, e)
286
+ self._ast_cache[file_path] = None
287
+ tree = None
288
+ return tree
289
+
290
+ def _find_function_node(
291
+ self, tree: ast.Module, name: str, line: int
292
+ ) -> Optional[ast.FunctionDef]:
293
+ """Find function node by name and line number."""
294
+ for node in ast.walk(tree):
295
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
296
+ if node.name == name and node.lineno == line:
297
+ return node
298
+ for node in ast.walk(tree):
299
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
300
+ if node.name == name:
301
+ return node
302
+ return None
303
+
304
+ def _get_call_name(self, node: ast.expr) -> Optional[str]:
305
+ """Extract call name from AST node."""
306
+ if isinstance(node, ast.Name):
307
+ return node.id
308
+ if isinstance(node, ast.Attribute):
309
+ value = self._get_call_name(node.value)
310
+ if value:
311
+ return f"{value}.{node.attr}"
312
+ return node.attr
313
+ return None
@@ -0,0 +1,192 @@
1
+ """Detection of code smells using analysis metrics."""
2
+ from typing import List, Dict, Any
3
+ from ..core.models import AnalysisResult, CodeSmell
4
+
5
+ class SmellDetector:
6
+ """Detect code smells from analysis results."""
7
+
8
+ def __init__(self, result: AnalysisResult):
9
+ self.result = result
10
+
11
+ def detect(self) -> List[CodeSmell]:
12
+ """Record and return detected code smells."""
13
+ smells = []
14
+ smells.extend(self._detect_god_functions())
15
+ smells.extend(self._detect_god_modules())
16
+ smells.extend(self._detect_feature_envy())
17
+ smells.extend(self._detect_data_clumps())
18
+ smells.extend(self._detect_shotgun_surgery())
19
+ smells.extend(self._detect_bottlenecks())
20
+ smells.extend(self._detect_circular_dependencies())
21
+
22
+ self.result.smells = smells
23
+ return smells
24
+
25
+ def _detect_god_functions(self) -> List[CodeSmell]:
26
+ """Detect high fan-out / large functions."""
27
+ smells = []
28
+ for func_name, func_info in self.result.functions.items():
29
+ metrics = self.result.metrics.get(func_name, {})
30
+ fan_out = metrics.get('fan_out', 0)
31
+ mutation_count = len([m for m in self.result.mutations if m.scope == func_name])
32
+
33
+ # Use cyclomatic complexity (now mapped to 'cc' in FunctionInfo.complexity)
34
+ complexity = func_info.complexity.get('cyclomatic_complexity', 1)
35
+
36
+ if fan_out > 10 or mutation_count > 6 or complexity > 12:
37
+ severity = (fan_out / 20) * 0.3 + (mutation_count / 15) * 0.3 + (complexity / 30) * 0.4
38
+ severity = min(1.0, severity)
39
+
40
+ smells.append(CodeSmell(
41
+ name=f"God Function: {func_info.name}",
42
+ type="god_function",
43
+ file=func_info.file,
44
+ line=func_info.line,
45
+ severity=severity,
46
+ description=f"Function '{func_info.name}' is oversized: CC={complexity}, fan-out={fan_out}, mutations={mutation_count}.",
47
+ context={"fan_out": fan_out, "mutations": mutation_count, "complexity": complexity, "function": func_name}
48
+ ))
49
+ return smells
50
+
51
+ def _detect_god_modules(self) -> List[CodeSmell]:
52
+ """Detect oversized modules/packages."""
53
+ smells = []
54
+ for mod_name, mod in self.result.modules.items():
55
+ f_count = len(mod.functions)
56
+ c_count = len(mod.classes)
57
+
58
+ if f_count > 40 or c_count > 10:
59
+ severity = (f_count / 100) * 0.5 + (c_count / 25) * 0.5
60
+ severity = min(1.0, severity)
61
+
62
+ smells.append(CodeSmell(
63
+ name=f"God Module: {mod_name}",
64
+ type="god_function", # Map to extract_method for simplicity or add god_module template
65
+ file=mod.file,
66
+ line=1,
67
+ severity=severity,
68
+ description=f"Module '{mod_name}' is too large ({f_count} functions, {c_count} classes). Consider splitting into sub-modules.",
69
+ context={"functions": f_count, "classes": c_count}
70
+ ))
71
+ return smells
72
+
73
+ def _detect_feature_envy(self) -> List[CodeSmell]:
74
+ """Detect functions that use other objects more than their own."""
75
+ smells = []
76
+ # Simplified: look for functions mutating many variables in OTHER modules
77
+ for func_name, func_info in self.result.functions.items():
78
+ mut_mod = func_name.split('.')[0]
79
+ foreign_mutations = []
80
+
81
+ for mutation in self.result.mutations:
82
+ if mutation.scope == func_name:
83
+ if '.' in mutation.variable:
84
+ origin_mod = mutation.variable.split('.')[0]
85
+ if origin_mod != mut_mod:
86
+ foreign_mutations.append(mutation.variable)
87
+
88
+ if len(set(foreign_mutations)) >= 3:
89
+ smells.append(CodeSmell(
90
+ name=f"Feature Envy: {func_info.name}",
91
+ type="feature_envy",
92
+ file=func_info.file,
93
+ line=func_info.line,
94
+ severity=0.7,
95
+ description=f"Function '{func_info.name}' mutates multiple variables in other modules: {', '.join(set(foreign_mutations))}.",
96
+ context={"foreign_mutations": list(set(foreign_mutations))}
97
+ ))
98
+ return smells
99
+
100
+ def _detect_data_clumps(self) -> List[CodeSmell]:
101
+ """Detect 3+ variables frequently passed together."""
102
+ smells = []
103
+ # Simplified: find functions with same 3+ arguments
104
+ arg_sets = {} # frozenset(args) -> List[func_names]
105
+ for func_name, func_info in self.result.functions.items():
106
+ if len(func_info.args) >= 3:
107
+ args = frozenset(func_info.args)
108
+ if args not in arg_sets:
109
+ arg_sets[args] = []
110
+ arg_sets[args].append(func_name)
111
+
112
+ for args, funcs in arg_sets.items():
113
+ if len(funcs) >= 2:
114
+ for func_name in funcs:
115
+ func_info = self.result.functions[func_name]
116
+ smells.append(CodeSmell(
117
+ name=f"Data Clump: {', '.join(args)}",
118
+ type="data_clump",
119
+ file=func_info.file,
120
+ line=func_info.line,
121
+ severity=0.6,
122
+ description=f"Arguments ({', '.join(args)}) are used together in multiple functions: {', '.join(funcs)}.",
123
+ context={"clump": list(args), "related_functions": funcs}
124
+ ))
125
+ return smells
126
+
127
+ def _detect_shotgun_surgery(self) -> List[CodeSmell]:
128
+ """Detect variables whose mutation requires changes across many functions."""
129
+ smells = []
130
+ var_mutators = {} # variable -> set(functions)
131
+
132
+ for mutation in self.result.mutations:
133
+ if mutation.variable not in var_mutators:
134
+ var_mutators[mutation.variable] = set()
135
+ var_mutators[mutation.variable].add(mutation.scope)
136
+
137
+ for var, funcs in var_mutators.items():
138
+ if len(funcs) >= 5:
139
+ # Find a representative function to report the smell
140
+ func_name = list(funcs)[0]
141
+ func_info = self.result.functions.get(func_name)
142
+ if not func_info: continue
143
+
144
+ smells.append(CodeSmell(
145
+ name=f"Shotgun Surgery: {var}",
146
+ type="shotgun_surgery",
147
+ file=func_info.file,
148
+ line=func_info.line,
149
+ severity=0.8,
150
+ description=f"Mutation of variable '{var}' spans {len(funcs)} functions. Changing this logic requires work in many places.",
151
+ context={"variable": var, "affected_functions": list(funcs)}
152
+ ))
153
+ return smells
154
+ def _detect_bottlenecks(self) -> List[CodeSmell]:
155
+ """Detect functions with high Betweenness Centrality."""
156
+ smells = []
157
+ # Central functions that many independent paths traverse
158
+ for func_name, func_info in self.result.functions.items():
159
+ if func_info.centrality > 0.1: # Heuristic threshold
160
+ smells.append(CodeSmell(
161
+ name=f"Structural Bottleneck: {func_info.name}",
162
+ type="bottleneck",
163
+ file=func_info.file,
164
+ line=func_info.line,
165
+ severity=min(1.0, func_info.centrality * 5),
166
+ description=f"Function '{func_info.name}' is a structural bottleneck (centrality={round(func_info.centrality, 3)}). Significant logic flows through this function.",
167
+ context={"centrality": func_info.centrality}
168
+ ))
169
+ return smells
170
+
171
+ def _detect_circular_dependencies(self) -> List[CodeSmell]:
172
+ """Detect circular dependencies in call graph."""
173
+ smells = []
174
+ cycles = self.result.metrics.get("project", {}).get("circular_dependencies", [])
175
+
176
+ for cycle in cycles:
177
+ if len(cycle) >= 2:
178
+ # Report on the first function in the cycle
179
+ func_name = cycle[0]
180
+ func_info = self.result.functions.get(func_name)
181
+ if not func_info: continue
182
+
183
+ smells.append(CodeSmell(
184
+ name=f"Circular Dependency: {' -> '.join(cycle)}",
185
+ type="circular_dependency",
186
+ file=func_info.file,
187
+ line=func_info.line,
188
+ severity=0.8,
189
+ description=f"Circular dependency detected: {' -> '.join(cycle)}. This indicates high coupling and may lead to infinite recursion or initialization issues.",
190
+ context={"cycle": cycle}
191
+ ))
192
+ return smells