PyPI - code2flow-toon - Versions diffs - 0.2.4__py3-none-any.whl - Mend

code2flow-toon 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

code2flow/__init__.py +47 -0
code2flow/__main__.py +6 -0
code2flow/analysis/__init__.py +17 -0
code2flow/analysis/call_graph.py +210 -0
code2flow/analysis/cfg.py +293 -0
code2flow/analysis/coupling.py +77 -0
code2flow/analysis/data_analysis.py +249 -0
code2flow/analysis/dfg.py +224 -0
code2flow/analysis/smells.py +192 -0
code2flow/cli.py +464 -0
code2flow/core/__init__.py +36 -0
code2flow/core/analyzer.py +765 -0
code2flow/core/config.py +177 -0
code2flow/core/models.py +194 -0
code2flow/core/streaming_analyzer.py +666 -0
code2flow/exporters/__init__.py +17 -0
code2flow/exporters/base.py +13 -0
code2flow/exporters/json_exporter.py +17 -0
code2flow/exporters/llm_exporter.py +199 -0
code2flow/exporters/mermaid_exporter.py +67 -0
code2flow/exporters/toon.py +401 -0
code2flow/exporters/yaml_exporter.py +108 -0
code2flow/llm_flow_generator.py +451 -0
code2flow/llm_task_generator.py +263 -0
code2flow/mermaid_generator.py +481 -0
code2flow/nlp/__init__.py +23 -0
code2flow/nlp/config.py +174 -0
code2flow/nlp/entity_resolution.py +326 -0
code2flow/nlp/intent_matching.py +297 -0
code2flow/nlp/normalization.py +122 -0
code2flow/nlp/pipeline.py +388 -0
code2flow/patterns/__init__.py +0 -0
code2flow/patterns/detector.py +168 -0
code2flow/refactor/__init__.py +0 -0
code2flow/refactor/prompt_engine.py +150 -0
code2flow/visualizers/__init__.py +0 -0
code2flow/visualizers/graph.py +196 -0
code2flow_toon-0.2.4.dist-info/METADATA +599 -0
code2flow_toon-0.2.4.dist-info/RECORD +43 -0
code2flow_toon-0.2.4.dist-info/WHEEL +5 -0
code2flow_toon-0.2.4.dist-info/entry_points.txt +2 -0
code2flow_toon-0.2.4.dist-info/licenses/LICENSE +201 -0
code2flow_toon-0.2.4.dist-info/top_level.txt +1 -0

code2flow/__init__.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""
+code2flow - Optimized Python Code Flow Analysis Tool
+A high-performance tool for analyzing Python code control flow, data flow,
+and call graphs with caching and parallel processing.
+Includes NLP Processing Pipeline for query normalization, intent matching,
+and entity resolution with multilingual support.
+"""
+__version__ = "0.2.4"
+__author__ = "STTS Project"
+# Core analysis components
+from .core.analyzer import ProjectAnalyzer
+from .core.config import Config, FAST_CONFIG
+from .core.models import AnalysisResult, FunctionInfo, ClassInfo, Pattern
+# NLP Processing Pipeline
+from .nlp import (
+    NLPPipeline,
+    QueryNormalizer,
+    IntentMatcher,
+    EntityResolver,
+    NLPConfig,
+    FAST_NLP_CONFIG,
+    PRECISE_NLP_CONFIG,
+)
+__all__ = [
+    # Core
+    "ProjectAnalyzer",
+    "Config",
+    "FAST_CONFIG",
+    "AnalysisResult",
+    "FunctionInfo",
+    "ClassInfo",
+    "Pattern",
+    # NLP Pipeline
+    "NLPPipeline",
+    "QueryNormalizer",
+    "IntentMatcher",
+    "EntityResolver",
+    "NLPConfig",
+    "FAST_NLP_CONFIG",
+    "PRECISE_NLP_CONFIG",
+]

code2flow/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Entry point for running code2flow as a module."""
+from .cli import main
+if __name__ == '__main__':
+    main()

code2flow/analysis/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Analysis package for code2flow."""
+from .cfg import CFGExtractor
+from .dfg import DFGExtractor
+from .call_graph import CallGraphExtractor
+from .coupling import CouplingAnalyzer
+from .smells import SmellDetector
+from .data_analysis import DataAnalyzer
+__all__ = [
+    'CFGExtractor',
+    'DFGExtractor',
+    'CallGraphExtractor',
+    'CouplingAnalyzer',
+    'SmellDetector',
+    'DataAnalyzer'
+]

code2flow/analysis/call_graph.py ADDED Viewed

@@ -0,0 +1,210 @@
+"""Call graph extractor using AST."""
+import ast
+from typing import Optional, Set, List, Dict
+import astroid
+from ..core.config import Config
+from ..core.models import AnalysisResult, FlowEdge
+class CallGraphExtractor(ast.NodeVisitor):
+    """Extract call graph from AST."""
+    def __init__(self, config: Config):
+        self.config = config
+        self.result = AnalysisResult()
+        self.module_name = ""
+        self.file_path = ""
+        # Context
+        self.function_stack = []
+        self.class_stack = []
+        self.imports = {}
+        self.astroid_tree = None
+    def extract(self, tree: ast.AST, module_name: str, file_path: str) -> AnalysisResult:
+        """Extract call graph from AST."""
+        self.result = AnalysisResult()
+        self.module_name = module_name
+        self.file_path = file_path
+        self.function_stack = []
+        self.class_stack = []
+        self.imports = {}
+        # Try to get astroid tree for better resolution
+        try:
+            self.astroid_tree = astroid.MANAGER.ast_from_file(file_path)
+        except Exception:
+            self.astroid_tree = None
+        self.visit(tree)
+        self._calculate_metrics()
+        return self.result
+    def _calculate_metrics(self):
+        """Calculate fan-in and fan-out metrics."""
+        # First, populate called_by for all functions
+        for caller_name, caller_info in self.result.functions.items():
+            for callee_name in caller_info.calls:
+                if callee_name in self.result.functions:
+                    self.result.functions[callee_name].called_by.append(caller_name)
+        # Then calculate metrics
+        for func_name, func_info in self.result.functions.items():
+            fan_out = len(set(func_info.calls))
+            fan_in = len(set(func_info.called_by))
+            self.result.metrics[func_name] = {
+                "fan_in": fan_in,
+                "fan_out": fan_out,
+                "complexity": getattr(func_info, 'complexity', 1) # Placeholder for now
+            }
+    def visit_Import(self, node: ast.Import):
+        """Track imports."""
+        for alias in node.names:
+            name = alias.asname if alias.asname else alias.name
+            self.imports[name] = alias.name
+            self.result.imports[name] = alias.name
+    def visit_ImportFrom(self, node: ast.ImportFrom):
+        """Track from imports."""
+        module = node.module or ""
+        for alias in node.names:
+            name = alias.asname if alias.asname else alias.name
+            full_name = f"{module}.{alias.name}" if module else alias.name
+            self.imports[name] = full_name
+            self.result.imports[name] = full_name
+    def visit_ClassDef(self, node: ast.ClassDef):
+        """Visit class definition."""
+        self.class_stack.append(node.name)
+        # Store class info
+        self.result.classes[node.name] = {
+            'file': self.file_path,
+            'line': node.lineno,
+            'methods': [m.name for m in node.body if isinstance(m, ast.FunctionDef)],
+            'bases': [self._expr_to_str(b) for b in node.bases]
+        }
+        for stmt in node.body:
+            self.visit(stmt)
+        self.class_stack.pop()
+    def visit_FunctionDef(self, node: ast.FunctionDef):
+        """Visit function definition and track calls within it."""
+        func_name = self._qualified_name(node.name)
+        self.function_stack.append(func_name)
+        # Visit body to find calls
+        for stmt in node.body:
+            self.visit(stmt)
+        self.function_stack.pop()
+    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
+        """Visit async function."""
+        self.visit_FunctionDef(node)
+    def visit_Call(self, node: ast.Call):
+        """Track function calls."""
+        if not self.function_stack:
+            self.generic_visit(node)
+            return
+        caller = self.function_stack[-1]
+        callee = self._resolve_call(node.func)
+        # If ast-based resolution failed or returned None.sth, try astroid
+        if (not callee or 'None.' in callee) and self.astroid_tree:
+            astroid_callee = self._resolve_with_astroid(node)
+            if astroid_callee:
+                callee = astroid_callee
+        if callee and caller in self.result.functions:
+            self.result.functions[caller].calls.append(callee)
+            # Create call edge
+            edge = FlowEdge(
+                source=-1,  # Will be resolved
+                target=-1,
+                edge_type="call",
+                metadata={'caller': caller, 'callee': callee}
+            )
+            self.result.call_edges.append(edge)
+        self.generic_visit(node)
+    def _qualified_name(self, name: str) -> str:
+        """Get fully qualified name."""
+        parts = [self.module_name]
+        if self.class_stack:
+            parts.append(self.class_stack[-1])
+        parts.append(name)
+        return '.'.join(parts)
+    def _resolve_call(self, node: ast.AST) -> Optional[str]:
+        """Resolve a call to its full name."""
+        if isinstance(node, ast.Name):
+            # Simple function call
+            if node.id in self.imports:
+                return self.imports[node.id]
+            return f"{self.module_name}.{node.id}"
+        elif isinstance(node, ast.Attribute):
+            # Method or module.function call
+            parts = []
+            current = node
+            while isinstance(current, ast.Attribute):
+                parts.append(current.attr)
+                current = current.value
+            if isinstance(current, ast.Name):
+                parts.append(current.id)
+                parts.reverse()
+                # Check if root is an import
+                root = parts[0]
+                if root in self.imports:
+                    return f"{self.imports[root]}.{'.'.join(parts[1:])}"
+                # Check for self/cls
+                if root in ('self', 'cls') and self.class_stack:
+                    return f"{self.module_name}.{self.class_stack[-1]}.{'.'.join(parts[1:])}"
+                return f"{self.module_name}.{'.'.join(parts)}"
+        return None
+    def _resolve_with_astroid(self, node: ast.Call) -> Optional[str]:
+        """Use astroid to infer the call target."""
+        if not self.astroid_tree:
+            return None
+        try:
+            # Find the corresponding astroid node by line/col
+            # This is a bit slow but robust
+            for astroid_node in self.astroid_tree.nodes_of_class(astroid.Call):
+                if astroid_node.lineno == node.lineno and astroid_node.col_offset == node.col_offset:
+                    # Infer the targets
+                    inferred = astroid_node.func.infer()
+                    for target in inferred:
+                        if hasattr(target, 'qname'):
+                            return target.qname()
+                    break
+        except Exception:
+            pass
+        return None
+    def _expr_to_str(self, node: ast.AST) -> str:
+        """Convert AST expression to string."""
+        if node is None:
+            return ""
+        try:
+            return ast.unparse(node) if hasattr(ast, 'unparse') else str(node)
+        except:
+            return str(node)

code2flow/analysis/cfg.py ADDED Viewed

@@ -0,0 +1,293 @@
+"""Control Flow Graph (CFG) extractor using AST."""
+import ast
+from collections import defaultdict
+from typing import Optional
+from ..core.config import Config
+from ..core.models import AnalysisResult, FlowNode, FlowEdge, FunctionInfo
+class CFGExtractor(ast.NodeVisitor):
+    """Extract Control Flow Graph from AST."""
+    def __init__(self, config: Config):
+        self.config = config
+        self.result = AnalysisResult()
+        self.module_name = ""
+        self.file_path = ""
+        self.node_counter = 0
+        # Context tracking
+        self.function_stack = []
+        self.class_stack = []
+        self.current_node = None
+        self.entry_nodes = {}  # Function -> entry node ID
+    def extract(self, tree: ast.AST, module_name: str, file_path: str) -> AnalysisResult:
+        """Extract CFG from AST."""
+        self.result = AnalysisResult()
+        self.module_name = module_name
+        self.file_path = file_path
+        self.node_counter = 0
+        self.visit(tree)
+        return self.result
+    def new_node(self, node_type: str, label: str, **kwargs) -> int:
+        """Create new flow node."""
+        node_id = self.node_counter
+        self.node_counter += 1
+        node = FlowNode(
+            id=node_id,
+            type=node_type,
+            label=label,
+            function=self.function_stack[-1] if self.function_stack else None,
+            file=self.file_path,
+            line=kwargs.get('line'),
+            column=kwargs.get('column'),
+            conditions=kwargs.get('conditions', []),
+            data_flow=kwargs.get('data_flow', [])
+        )
+        self.result.nodes[node_id] = node
+        return node_id
+    def connect(self, source: Optional[int], target: Optional[int],
+                edge_type: str = "control", condition: Optional[str] = None):
+        """Create edge between nodes."""
+        if source is not None and target is not None:
+            edge = FlowEdge(
+                source=source,
+                target=target,
+                edge_type=edge_type,
+                condition=condition
+            )
+            self.result.cfg_edges.append(edge)
+    def visit_FunctionDef(self, node: ast.FunctionDef):
+        """Visit function definition."""
+        func_name = self._qualified_name(node.name)
+        self.function_stack.append(func_name)
+        # Create entry node
+        entry = self.new_node("FUNC", f"FUNC:{func_name}", line=node.lineno)
+        self.entry_nodes[func_name] = entry
+        # Track previous node
+        prev_node = self.current_node
+        self.current_node = entry
+        # Store function info
+        func_info = FunctionInfo(
+            name=node.name,
+            qualified_name=func_name,
+            file=self.file_path,
+            line_start=node.lineno,
+            line_end=node.end_lineno or node.lineno,
+            args=[arg.arg for arg in node.args.args]
+        )
+        self.result.functions[func_name] = func_info
+        # Visit body
+        for stmt in node.body:
+            self.visit(stmt)
+        # Create exit node
+        exit_node = self.new_node("RETURN", f"RETURN:{func_name}",
+                                  line=node.end_lineno or node.lineno)
+        self.connect(self.current_node, exit_node)
+        # Restore context
+        self.function_stack.pop()
+        self.current_node = prev_node
+    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
+        """Visit async function definition."""
+        self.visit_FunctionDef(node)  # Treat same as sync for CFG
+    def visit_If(self, node: ast.If):
+        """Visit if statement."""
+        # Create condition node
+        condition = self._extract_condition(node.test)
+        cond_node = self.new_node("IF", condition, line=node.lineno)
+        self.connect(self.current_node, cond_node, condition=condition)
+        # Save current for branches
+        branch_entry = cond_node
+        # Visit then branch
+        then_last = []
+        for stmt in node.body:
+            prev = self.current_node
+            self.current_node = branch_entry
+            self.visit(stmt)
+            then_last.append(self.current_node)
+            branch_entry = self.current_node
+        # Visit else branch
+        else_last = []
+        if node.orelse:
+            branch_entry = cond_node
+            for stmt in node.orelse:
+                prev = self.current_node
+                self.current_node = branch_entry
+                self.visit(stmt)
+                else_last.append(self.current_node)
+                branch_entry = self.current_node
+        # Merge point
+        merge_node = self.new_node("MERGE", "merge", line=node.end_lineno)
+        for last in then_last + else_last:
+            self.connect(last, merge_node)
+        self.current_node = merge_node
+    def visit_For(self, node: ast.For):
+        """Visit for loop."""
+        # Create loop header
+        iter_str = self._expr_to_str(node.iter)
+        target_str = self._expr_to_str(node.target)
+        loop_header = self.new_node("FOR", f"for {target_str} in {iter_str}",
+                                    line=node.lineno)
+        self.connect(self.current_node, loop_header)
+        # Loop body
+        body_entry = loop_header
+        body_last = []
+        for stmt in node.body:
+            self.current_node = body_entry
+            self.visit(stmt)
+            body_last.append(self.current_node)
+            body_entry = self.current_node
+        # Back edge to header
+        for last in body_last:
+            self.connect(last, loop_header, edge_type="loop")
+        # Exit (after loop)
+        exit_node = self.new_node("EXIT_LOOP", "exit_loop", line=node.end_lineno)
+        self.connect(loop_header, exit_node)  # False branch
+        self.current_node = exit_node
+    def visit_While(self, node: ast.While):
+        """Visit while loop."""
+        # Loop header with condition
+        condition = self._extract_condition(node.test)
+        loop_header = self.new_node("WHILE", f"while {condition}", line=node.lineno)
+        self.connect(self.current_node, loop_header)
+        # Loop body
+        body_entry = loop_header
+        body_last = []
+        for stmt in node.body:
+            self.current_node = body_entry
+            self.visit(stmt)
+            body_last.append(self.current_node)
+            body_entry = self.current_node
+        # Back edge
+        for last in body_last:
+            self.connect(last, loop_header, edge_type="loop")
+        # Exit
+        exit_node = self.new_node("EXIT_LOOP", "exit_loop", line=node.end_lineno)
+        self.connect(loop_header, exit_node, condition="False")
+        self.current_node = exit_node
+    def visit_Try(self, node: ast.Try):
+        """Visit try statement."""
+        try_entry = self.new_node("TRY", "try", line=node.lineno)
+        self.connect(self.current_node, try_entry)
+        # Try body
+        self.current_node = try_entry
+        for stmt in node.body:
+            self.visit(stmt)
+        try_last = self.current_node
+        # Except handlers
+        for handler in node.handlers:
+            handler_node = self.new_node("EXCEPT", self._format_except(handler),
+                                         line=handler.lineno)
+            self.connect(try_entry, handler_node, edge_type="exception")
+            self.current_node = handler_node
+            for stmt in handler.body:
+                self.visit(stmt)
+        # Merge
+        merge = self.new_node("MERGE", "merge", line=node.end_lineno)
+        self.connect(try_last, merge)
+        self.current_node = merge
+    def visit_Assign(self, node: ast.Assign):
+        """Visit assignment."""
+        targets = [self._expr_to_str(t) for t in node.targets]
+        value = self._expr_to_str(node.value)
+        label = f"{' = '.join(targets)} = {value[:50]}"
+        assign_node = self.new_node("ASSIGN", label, line=node.lineno)
+        self.connect(self.current_node, assign_node)
+        self.current_node = assign_node
+    def visit_Return(self, node: ast.Return):
+        """Visit return statement."""
+        value = self._expr_to_str(node.value) if node.value else "None"
+        return_node = self.new_node("RETURN", f"return {value[:50]}", line=node.lineno)
+        self.connect(self.current_node, return_node)
+        self.current_node = return_node
+    def visit_Expr(self, node: ast.Expr):
+        """Visit expression statement."""
+        if isinstance(node.value, ast.Call):
+            # Function call
+            call_name = self._expr_to_str(node.value.func)
+            args = [self._expr_to_str(a) for a in node.value.args]
+            label = f"CALL {call_name}({', '.join(args)})"[:80]
+            call_node = self.new_node("CALL", label, line=node.lineno)
+            self.connect(self.current_node, call_node)
+            self.current_node = call_node
+            # Track call in function info
+            if self.function_stack:
+                func_name = self.function_stack[-1]
+                if func_name in self.result.functions:
+                    self.result.functions[func_name].calls.add(call_name)
+        else:
+            self.generic_visit(node)
+    def _qualified_name(self, name: str) -> str:
+        """Get fully qualified name."""
+        parts = [self.module_name]
+        if self.class_stack:
+            parts.append(self.class_stack[-1])
+        parts.append(name)
+        return '.'.join(parts)
+    def _extract_condition(self, node: ast.AST) -> str:
+        """Extract condition as string."""
+        try:
+            return ast.unparse(node) if hasattr(ast, 'unparse') else str(node)[:50]
+        except:
+            return str(node)[:50]
+    def _expr_to_str(self, node: ast.AST) -> str:
+        """Convert AST expression to string."""
+        if node is None:
+            return "None"
+        try:
+            return ast.unparse(node) if hasattr(ast, 'unparse') else str(node)
+        except:
+            return str(node)
+    def _format_except(self, handler: ast.ExceptHandler) -> str:
+        """Format except handler."""
+        if handler.type:
+            type_str = self._expr_to_str(handler.type)
+            if handler.name:
+                return f"except {type_str} as {handler.name}"
+            return f"except {type_str}"
+        return "except"

code2flow/analysis/coupling.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""Analysis of coupling between modules."""
+from typing import Dict, List, Set, Any
+from ..core.models import AnalysisResult
+class CouplingAnalyzer:
+    """Analyze coupling between modules."""
+    def __init__(self, result: AnalysisResult):
+        self.result = result
+    def analyze(self) -> Dict[str, Any]:
+        """Perform coupling analysis."""
+        coupling_data = {
+            "module_interactions": self._analyze_module_interactions(),
+            "data_leakage": self._detect_data_leakage(),
+            "shared_state": self._detect_shared_state()
+        }
+        self.result.coupling = coupling_data
+        return coupling_data
+    def _analyze_module_interactions(self) -> Dict[str, Set[str]]:
+        """Track which modules call which other modules."""
+        interactions = {}
+        for func_name, func_info in self.result.functions.items():
+            caller_mod = func_info.module or func_name.split('.')[0]
+            if caller_mod not in interactions:
+                interactions[caller_mod] = set()
+            for callee in func_info.calls:
+                callee_mod = callee.split('.')[0]
+                if callee_mod != caller_mod:
+                    interactions[caller_mod].add(callee_mod)
+        # Convert sets to lists for JSON serialization
+        return {k: list(v) for k, v in interactions.items()}
+    def _detect_data_leakage(self) -> List[Dict[str, Any]]:
+        """Detect when a module mutates data defined in another module."""
+        leakages = []
+        # Heuristic: if a function in module A mutates an object passed from module B
+        # This is simplified: we look for mutations where the scope module
+        # is different from the variable's likely origin.
+        for mutation in self.result.mutations:
+            scope_parts = mutation.scope.split('.')
+            mut_mod = scope_parts[0]
+            # If the variable name looks like it belongs to another module (e.g. 'other_mod.data')
+            if '.' in mutation.variable:
+                origin_mod = mutation.variable.split('.')[0]
+                if origin_mod != mut_mod and origin_mod in self.result.modules:
+                    leakages.append({
+                        "variable": mutation.variable,
+                        "mutator_module": mut_mod,
+                        "origin_module": origin_mod,
+                        "line": mutation.line,
+                        "file": mutation.file
+                    })
+        return leakages
+    def _detect_shared_state(self) -> List[Dict[str, Any]]:
+        """Detect modules that access/mutate the same global/shared variables."""
+        shared = []
+        variable_accessors = {} # var -> set(modules)
+        for mutation in self.result.mutations:
+            mut_mod = mutation.scope.split('.')[0]
+            if mutation.variable not in variable_accessors:
+                variable_accessors[mutation.variable] = set()
+            variable_accessors[mutation.variable].add(mut_mod)
+        for var, mods in variable_accessors.items():
+            if len(mods) > 1:
+                shared.append({
+                    "variable": var,
+                    "modules": list(mods)
+                })
+        return shared