PyPI - code2llm - Versions diffs - 0.3.7__py3-none-any.whl - Mend

code2llm 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

code2flow/__init__.py +47 -0
code2flow/__main__.py +6 -0
code2flow/analysis/__init__.py +23 -0
code2flow/analysis/call_graph.py +210 -0
code2flow/analysis/cfg.py +293 -0
code2flow/analysis/coupling.py +77 -0
code2flow/analysis/data_analysis.py +249 -0
code2flow/analysis/dfg.py +224 -0
code2flow/analysis/pipeline_detector.py +445 -0
code2flow/analysis/side_effects.py +313 -0
code2flow/analysis/smells.py +192 -0
code2flow/analysis/type_inference.py +306 -0
code2flow/cli.py +493 -0
code2flow/core/__init__.py +36 -0
code2flow/core/analyzer.py +765 -0
code2flow/core/config.py +177 -0
code2flow/core/models.py +194 -0
code2flow/core/streaming_analyzer.py +666 -0
code2flow/exporters/__init__.py +35 -0
code2flow/exporters/base.py +13 -0
code2flow/exporters/context_exporter.py +207 -0
code2flow/exporters/flow_exporter.py +570 -0
code2flow/exporters/json_exporter.py +17 -0
code2flow/exporters/llm_exporter.py +12 -0
code2flow/exporters/map_exporter.py +218 -0
code2flow/exporters/mermaid_exporter.py +67 -0
code2flow/exporters/toon.py +982 -0
code2flow/exporters/yaml_exporter.py +108 -0
code2flow/llm_flow_generator.py +451 -0
code2flow/llm_task_generator.py +263 -0
code2flow/mermaid_generator.py +481 -0
code2flow/nlp/__init__.py +23 -0
code2flow/nlp/config.py +174 -0
code2flow/nlp/entity_resolution.py +326 -0
code2flow/nlp/intent_matching.py +297 -0
code2flow/nlp/normalization.py +122 -0
code2flow/nlp/pipeline.py +388 -0
code2flow/patterns/__init__.py +0 -0
code2flow/patterns/detector.py +168 -0
code2flow/refactor/__init__.py +0 -0
code2flow/refactor/prompt_engine.py +150 -0
code2flow/visualizers/__init__.py +0 -0
code2flow/visualizers/graph.py +196 -0
code2llm-0.3.7.dist-info/METADATA +604 -0
code2llm-0.3.7.dist-info/RECORD +49 -0
code2llm-0.3.7.dist-info/WHEEL +5 -0
code2llm-0.3.7.dist-info/entry_points.txt +2 -0
code2llm-0.3.7.dist-info/licenses/LICENSE +201 -0
code2llm-0.3.7.dist-info/top_level.txt +1 -0

code2flow/analysis/side_effects.py ADDED Viewed

@@ -0,0 +1,313 @@
+"""Side-effect detector — AST-based side-effect classification.
+Scans Python function bodies to detect:
+- IO: open(), read(), write(), print(), file operations
+- Cache: cache lookups/stores, memoization, lru_cache
+- Mutation: self.x = ..., global, del, list.append/insert
+- Pure: no detected side effects
+Used by FlowExporter to enrich CONTRACTS and SIDE_EFFECTS sections.
+"""
+import ast
+import logging
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set
+from ..core.models import FunctionInfo
+logger = logging.getLogger(__name__)
+# Side-effect classification patterns
+IO_CALLS = frozenset({
+    "open", "read", "write", "print", "input",
+    "mkdir", "makedirs", "rmdir", "remove", "unlink", "rename",
+    "read_text", "write_text", "read_bytes", "write_bytes",
+    "dump", "dumps", "load", "loads",
+    "save", "savefig",
+    "send", "recv", "connect", "listen", "accept",
+    "get", "post", "put", "delete", "patch",  # HTTP
+    "execute", "commit", "rollback",  # DB
+})
+IO_ATTRIBUTES = frozenset({
+    "write", "read", "readline", "readlines", "writelines",
+    "flush", "close", "seek", "tell",
+    "send", "recv", "sendall",
+})
+CACHE_INDICATORS = frozenset({
+    "cache", "lru_cache", "memoize", "cached_property",
+    "Cache", "FileCache",
+})
+CACHE_CALLS = frozenset({
+    "cache_get", "cache_set", "cache_delete", "cache_clear",
+    "get_cached", "set_cached",
+})
+MUTATION_CALLS = frozenset({
+    "append", "extend", "insert", "pop", "remove", "clear",
+    "update", "setdefault", "add", "discard",
+    "sort", "reverse",
+})
+class SideEffectInfo:
+    """Side-effect analysis result for a single function."""
+    __slots__ = (
+        "function_name", "qualified_name", "classification",
+        "io_operations", "cache_operations", "mutations",
+        "global_refs", "self_mutations", "has_yield",
+    )
+    def __init__(self, function_name: str, qualified_name: str):
+        self.function_name = function_name
+        self.qualified_name = qualified_name
+        self.classification: str = "pure"  # pure | IO | cache | mutation
+        self.io_operations: List[str] = []
+        self.cache_operations: List[str] = []
+        self.mutations: List[str] = []
+        self.global_refs: List[str] = []
+        self.self_mutations: List[str] = []
+        self.has_yield: bool = False
+    @property
+    def is_pure(self) -> bool:
+        return self.classification == "pure"
+    @property
+    def side_effect_summary(self) -> str:
+        """One-line summary of side effects."""
+        parts = []
+        if self.io_operations:
+            parts.append(f"IO({', '.join(self.io_operations[:3])})")
+        if self.cache_operations:
+            parts.append(f"cache({', '.join(self.cache_operations[:2])})")
+        if self.self_mutations:
+            parts.append(f"mutates self.{', self.'.join(self.self_mutations[:3])}")
+        if self.global_refs:
+            parts.append(f"global({', '.join(self.global_refs[:2])})")
+        if self.has_yield:
+            parts.append("generator")
+        return "; ".join(parts) if parts else "pure"
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "function": self.function_name,
+            "qualified_name": self.qualified_name,
+            "classification": self.classification,
+            "io": self.io_operations,
+            "cache": self.cache_operations,
+            "mutations": self.mutations,
+            "globals": self.global_refs,
+            "self_mutations": self.self_mutations,
+            "has_yield": self.has_yield,
+            "summary": self.side_effect_summary,
+        }
+class SideEffectDetector:
+    """Detect side effects in Python functions via AST analysis.
+    Scans function bodies for IO operations, cache usage, mutations,
+    global references, and self-attribute mutations.
+    """
+    def __init__(self):
+        self._ast_cache: Dict[str, Optional[ast.Module]] = {}
+    def analyze_function(self, fi: FunctionInfo) -> SideEffectInfo:
+        """Analyze a single function for side effects."""
+        info = SideEffectInfo(fi.name, fi.qualified_name)
+        tree = self._get_ast(fi.file)
+        if tree:
+            node = self._find_function_node(tree, fi.name, fi.line)
+            if node:
+                self._scan_node(node, info)
+                self._classify(info)
+                return info
+        # Fallback: heuristic from function name and calls
+        self._heuristic_classify(fi, info)
+        return info
+    def analyze_all(
+        self, funcs: Dict[str, FunctionInfo]
+    ) -> Dict[str, SideEffectInfo]:
+        """Batch-analyze all functions for side effects."""
+        results: Dict[str, SideEffectInfo] = {}
+        for qname, fi in funcs.items():
+            results[qname] = self.analyze_function(fi)
+        return results
+    def get_purity_score(self, fi: FunctionInfo) -> str:
+        """Get purity classification: pure | IO | cache | mutation."""
+        return self.analyze_function(fi).classification
+    # ------------------------------------------------------------------
+    # AST scanning
+    # ------------------------------------------------------------------
+    def _scan_node(self, func_node: ast.FunctionDef, info: SideEffectInfo) -> None:
+        """Walk function body and detect side-effect patterns."""
+        for node in ast.walk(func_node):
+            self._check_calls(node, info)
+            self._check_assignments(node, info)
+            self._check_globals(node, info)
+            self._check_yield(node, info)
+            self._check_delete(node, info)
+    def _check_calls(self, node: ast.AST, info: SideEffectInfo) -> None:
+        """Detect IO and cache calls."""
+        if not isinstance(node, ast.Call):
+            return
+        call_name = self._get_call_name(node.func)
+        if not call_name:
+            return
+        parts = call_name.split(".")
+        base_name = parts[-1]
+        # IO detection
+        if base_name in IO_CALLS:
+            info.io_operations.append(base_name)
+        elif base_name in IO_ATTRIBUTES:
+            info.io_operations.append(call_name)
+        # Cache detection
+        if base_name in CACHE_CALLS:
+            info.cache_operations.append(base_name)
+        elif any(ci in call_name for ci in CACHE_INDICATORS):
+            info.cache_operations.append(call_name)
+        # Mutation via method calls (e.g., list.append)
+        if base_name in MUTATION_CALLS and len(parts) >= 2:
+            info.mutations.append(call_name)
+    def _check_assignments(self, node: ast.AST, info: SideEffectInfo) -> None:
+        """Detect self.x = ... and augmented assignments."""
+        if isinstance(node, (ast.Assign, ast.AugAssign, ast.AnnAssign)):
+            targets = []
+            if isinstance(node, ast.Assign):
+                targets = node.targets
+            elif isinstance(node, (ast.AugAssign, ast.AnnAssign)):
+                targets = [node.target]
+            for target in targets:
+                if isinstance(target, ast.Attribute):
+                    if isinstance(target.value, ast.Name) and target.value.id == "self":
+                        info.self_mutations.append(target.attr)
+    def _check_globals(self, node: ast.AST, info: SideEffectInfo) -> None:
+        """Detect global/nonlocal references."""
+        if isinstance(node, ast.Global):
+            info.global_refs.extend(node.names)
+        elif isinstance(node, ast.Nonlocal):
+            info.global_refs.extend(node.names)
+    def _check_yield(self, node: ast.AST, info: SideEffectInfo) -> None:
+        """Detect generator functions."""
+        if isinstance(node, (ast.Yield, ast.YieldFrom)):
+            info.has_yield = True
+    def _check_delete(self, node: ast.AST, info: SideEffectInfo) -> None:
+        """Detect del statements on attributes."""
+        if isinstance(node, ast.Delete):
+            for target in node.targets:
+                if isinstance(target, ast.Attribute):
+                    if isinstance(target.value, ast.Name) and target.value.id == "self":
+                        info.self_mutations.append(f"del:{target.attr}")
+    # ------------------------------------------------------------------
+    # classification
+    # ------------------------------------------------------------------
+    def _classify(self, info: SideEffectInfo) -> None:
+        """Classify function based on detected side effects."""
+        # Priority: IO > cache > mutation > pure
+        if info.io_operations:
+            info.classification = "IO"
+        elif info.cache_operations:
+            info.classification = "cache"
+        elif info.self_mutations or info.mutations or info.global_refs:
+            info.classification = "mutation"
+        else:
+            info.classification = "pure"
+    def _heuristic_classify(
+        self, fi: FunctionInfo, info: SideEffectInfo
+    ) -> None:
+        """Classify based on function name and calls (fallback)."""
+        name_lower = fi.name.lower()
+        calls_lower = {c.lower() for c in fi.calls}
+        io_words = {"write", "read", "open", "save", "load", "export",
+                     "dump", "print", "mkdir", "rmdir", "remove"}
+        cache_words = {"cache", "memoize", "lru_cache", "store", "fetch"}
+        mutation_words = {"set_", "update", "modify", "mutate", "append",
+                          "insert", "delete", "fix", "patch"}
+        if any(w in name_lower for w in io_words):
+            info.classification = "IO"
+            info.io_operations.append(f"name:{fi.name}")
+        elif any(any(w in c for w in io_words) for c in calls_lower):
+            info.classification = "IO"
+            info.io_operations.append("calls:IO")
+        elif any(w in name_lower for w in cache_words):
+            info.classification = "cache"
+            info.cache_operations.append(f"name:{fi.name}")
+        elif any(any(w in c for w in cache_words) for c in calls_lower):
+            info.classification = "cache"
+            info.cache_operations.append("calls:cache")
+        elif any(w in name_lower for w in mutation_words):
+            info.classification = "mutation"
+            info.mutations.append(f"name:{fi.name}")
+        else:
+            info.classification = "pure"
+    # ------------------------------------------------------------------
+    # AST helpers
+    # ------------------------------------------------------------------
+    def _get_ast(self, file_path: str) -> Optional[ast.Module]:
+        """Parse and cache AST for a source file."""
+        if not file_path:
+            return None
+        if file_path in self._ast_cache:
+            return self._ast_cache[file_path]
+        try:
+            source = Path(file_path).read_text(encoding="utf-8", errors="replace")
+            tree = ast.parse(source, filename=file_path)
+            self._ast_cache[file_path] = tree
+        except (OSError, SyntaxError) as e:
+            logger.debug("Cannot parse %s: %s", file_path, e)
+            self._ast_cache[file_path] = None
+            tree = None
+        return tree
+    def _find_function_node(
+        self, tree: ast.Module, name: str, line: int
+    ) -> Optional[ast.FunctionDef]:
+        """Find function node by name and line number."""
+        for node in ast.walk(tree):
+            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                if node.name == name and node.lineno == line:
+                    return node
+        for node in ast.walk(tree):
+            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                if node.name == name:
+                    return node
+        return None
+    def _get_call_name(self, node: ast.expr) -> Optional[str]:
+        """Extract call name from AST node."""
+        if isinstance(node, ast.Name):
+            return node.id
+        if isinstance(node, ast.Attribute):
+            value = self._get_call_name(node.value)
+            if value:
+                return f"{value}.{node.attr}"
+            return node.attr
+        return None

code2flow/analysis/smells.py ADDED Viewed

@@ -0,0 +1,192 @@
+"""Detection of code smells using analysis metrics."""
+from typing import List, Dict, Any
+from ..core.models import AnalysisResult, CodeSmell
+class SmellDetector:
+    """Detect code smells from analysis results."""
+    def __init__(self, result: AnalysisResult):
+        self.result = result
+    def detect(self) -> List[CodeSmell]:
+        """Record and return detected code smells."""
+        smells = []
+        smells.extend(self._detect_god_functions())
+        smells.extend(self._detect_god_modules())
+        smells.extend(self._detect_feature_envy())
+        smells.extend(self._detect_data_clumps())
+        smells.extend(self._detect_shotgun_surgery())
+        smells.extend(self._detect_bottlenecks())
+        smells.extend(self._detect_circular_dependencies())
+        self.result.smells = smells
+        return smells
+    def _detect_god_functions(self) -> List[CodeSmell]:
+        """Detect high fan-out / large functions."""
+        smells = []
+        for func_name, func_info in self.result.functions.items():
+            metrics = self.result.metrics.get(func_name, {})
+            fan_out = metrics.get('fan_out', 0)
+            mutation_count = len([m for m in self.result.mutations if m.scope == func_name])
+            # Use cyclomatic complexity (now mapped to 'cc' in FunctionInfo.complexity)
+            complexity = func_info.complexity.get('cyclomatic_complexity', 1)
+            if fan_out > 10 or mutation_count > 6 or complexity > 12:
+                severity = (fan_out / 20) * 0.3 + (mutation_count / 15) * 0.3 + (complexity / 30) * 0.4
+                severity = min(1.0, severity)
+                smells.append(CodeSmell(
+                    name=f"God Function: {func_info.name}",
+                    type="god_function",
+                    file=func_info.file,
+                    line=func_info.line,
+                    severity=severity,
+                    description=f"Function '{func_info.name}' is oversized: CC={complexity}, fan-out={fan_out}, mutations={mutation_count}.",
+                    context={"fan_out": fan_out, "mutations": mutation_count, "complexity": complexity, "function": func_name}
+                ))
+        return smells
+    def _detect_god_modules(self) -> List[CodeSmell]:
+        """Detect oversized modules/packages."""
+        smells = []
+        for mod_name, mod in self.result.modules.items():
+            f_count = len(mod.functions)
+            c_count = len(mod.classes)
+            if f_count > 40 or c_count > 10:
+                severity = (f_count / 100) * 0.5 + (c_count / 25) * 0.5
+                severity = min(1.0, severity)
+                smells.append(CodeSmell(
+                    name=f"God Module: {mod_name}",
+                    type="god_function", # Map to extract_method for simplicity or add god_module template
+                    file=mod.file,
+                    line=1,
+                    severity=severity,
+                    description=f"Module '{mod_name}' is too large ({f_count} functions, {c_count} classes). Consider splitting into sub-modules.",
+                    context={"functions": f_count, "classes": c_count}
+                ))
+        return smells
+    def _detect_feature_envy(self) -> List[CodeSmell]:
+        """Detect functions that use other objects more than their own."""
+        smells = []
+        # Simplified: look for functions mutating many variables in OTHER modules
+        for func_name, func_info in self.result.functions.items():
+            mut_mod = func_name.split('.')[0]
+            foreign_mutations = []
+            for mutation in self.result.mutations:
+                if mutation.scope == func_name:
+                    if '.' in mutation.variable:
+                        origin_mod = mutation.variable.split('.')[0]
+                        if origin_mod != mut_mod:
+                            foreign_mutations.append(mutation.variable)
+            if len(set(foreign_mutations)) >= 3:
+                smells.append(CodeSmell(
+                    name=f"Feature Envy: {func_info.name}",
+                    type="feature_envy",
+                    file=func_info.file,
+                    line=func_info.line,
+                    severity=0.7,
+                    description=f"Function '{func_info.name}' mutates multiple variables in other modules: {', '.join(set(foreign_mutations))}.",
+                    context={"foreign_mutations": list(set(foreign_mutations))}
+                ))
+        return smells
+    def _detect_data_clumps(self) -> List[CodeSmell]:
+        """Detect 3+ variables frequently passed together."""
+        smells = []
+        # Simplified: find functions with same 3+ arguments
+        arg_sets = {} # frozenset(args) -> List[func_names]
+        for func_name, func_info in self.result.functions.items():
+            if len(func_info.args) >= 3:
+                args = frozenset(func_info.args)
+                if args not in arg_sets:
+                    arg_sets[args] = []
+                arg_sets[args].append(func_name)
+        for args, funcs in arg_sets.items():
+            if len(funcs) >= 2:
+                for func_name in funcs:
+                    func_info = self.result.functions[func_name]
+                    smells.append(CodeSmell(
+                        name=f"Data Clump: {', '.join(args)}",
+                        type="data_clump",
+                        file=func_info.file,
+                        line=func_info.line,
+                        severity=0.6,
+                        description=f"Arguments ({', '.join(args)}) are used together in multiple functions: {', '.join(funcs)}.",
+                        context={"clump": list(args), "related_functions": funcs}
+                    ))
+        return smells
+    def _detect_shotgun_surgery(self) -> List[CodeSmell]:
+        """Detect variables whose mutation requires changes across many functions."""
+        smells = []
+        var_mutators = {} # variable -> set(functions)
+        for mutation in self.result.mutations:
+            if mutation.variable not in var_mutators:
+                var_mutators[mutation.variable] = set()
+            var_mutators[mutation.variable].add(mutation.scope)
+        for var, funcs in var_mutators.items():
+            if len(funcs) >= 5:
+                # Find a representative function to report the smell
+                func_name = list(funcs)[0]
+                func_info = self.result.functions.get(func_name)
+                if not func_info: continue
+                smells.append(CodeSmell(
+                    name=f"Shotgun Surgery: {var}",
+                    type="shotgun_surgery",
+                    file=func_info.file,
+                    line=func_info.line,
+                    severity=0.8,
+                    description=f"Mutation of variable '{var}' spans {len(funcs)} functions. Changing this logic requires work in many places.",
+                    context={"variable": var, "affected_functions": list(funcs)}
+                ))
+        return smells
+    def _detect_bottlenecks(self) -> List[CodeSmell]:
+        """Detect functions with high Betweenness Centrality."""
+        smells = []
+        # Central functions that many independent paths traverse
+        for func_name, func_info in self.result.functions.items():
+            if func_info.centrality > 0.1: # Heuristic threshold
+                smells.append(CodeSmell(
+                    name=f"Structural Bottleneck: {func_info.name}",
+                    type="bottleneck",
+                    file=func_info.file,
+                    line=func_info.line,
+                    severity=min(1.0, func_info.centrality * 5),
+                    description=f"Function '{func_info.name}' is a structural bottleneck (centrality={round(func_info.centrality, 3)}). Significant logic flows through this function.",
+                    context={"centrality": func_info.centrality}
+                ))
+        return smells
+    def _detect_circular_dependencies(self) -> List[CodeSmell]:
+        """Detect circular dependencies in call graph."""
+        smells = []
+        cycles = self.result.metrics.get("project", {}).get("circular_dependencies", [])
+        for cycle in cycles:
+            if len(cycle) >= 2:
+                # Report on the first function in the cycle
+                func_name = cycle[0]
+                func_info = self.result.functions.get(func_name)
+                if not func_info: continue
+                smells.append(CodeSmell(
+                    name=f"Circular Dependency: {' -> '.join(cycle)}",
+                    type="circular_dependency",
+                    file=func_info.file,
+                    line=func_info.line,
+                    severity=0.8,
+                    description=f"Circular dependency detected: {' -> '.join(cycle)}. This indicates high coupling and may lead to infinite recursion or initialization issues.",
+                    context={"cycle": cycle}
+                ))
+        return smells