PyPI - pyrefactor - Versions diffs - 1.0.1__py3-none-any.whl - Mend

pyrefactor 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

pyrefactor/__init__.py +3 -0
pyrefactor/__main__.py +231 -0
pyrefactor/analyzer.py +185 -0
pyrefactor/ast_visitor.py +197 -0
pyrefactor/config.py +224 -0
pyrefactor/detectors/__init__.py +23 -0
pyrefactor/detectors/boolean_logic.py +231 -0
pyrefactor/detectors/comparisons.py +353 -0
pyrefactor/detectors/complexity.py +248 -0
pyrefactor/detectors/context_manager.py +188 -0
pyrefactor/detectors/control_flow.py +156 -0
pyrefactor/detectors/dict_operations.py +346 -0
pyrefactor/detectors/duplication.py +358 -0
pyrefactor/detectors/loops.py +267 -0
pyrefactor/detectors/performance.py +267 -0
pyrefactor/models.py +98 -0
pyrefactor/py.typed +0 -0
pyrefactor/reporter.py +208 -0
pyrefactor-1.0.1.dist-info/METADATA +353 -0
pyrefactor-1.0.1.dist-info/RECORD +24 -0
pyrefactor-1.0.1.dist-info/WHEEL +5 -0
pyrefactor-1.0.1.dist-info/entry_points.txt +2 -0
pyrefactor-1.0.1.dist-info/licenses/LICENSE.md +70 -0
pyrefactor-1.0.1.dist-info/top_level.txt +1 -0

pyrefactor/detectors/context_manager.py ADDED Viewed

@@ -0,0 +1,188 @@
+"""Context manager detector for PyRefactor."""
+import ast
+from typing import Optional, Union, cast
+from ..ast_visitor import BaseDetector
+from ..config import Config
+from ..models import Issue, Severity
+# Functions that return context managers and should be used with 'with'
+CONTEXT_MANAGER_FUNCS = frozenset(
+    {
+        "open",
+        "file",
+        "urlopen",
+        "NamedTemporaryFile",
+        "SpooledTemporaryFile",
+        "TemporaryDirectory",
+        "TemporaryFile",
+        "ZipFile",
+        "PyZipFile",
+        "TarFile",
+        "Popen",
+        "Pool",
+    }
+)
+# Methods that return context managers
+CONTEXT_MANAGER_METHODS = frozenset({"open", "acquire", "start"})
+class ContextManagerDetector(BaseDetector):
+    """Detects resource-allocating operations that should use 'with' statements."""
+    def __init__(self, config: Config, file_path: str, source_lines: list[str]) -> None:
+        """Initialize context manager detector."""
+        super().__init__(config, file_path, source_lines)
+        self.resource_assignments: dict[str, Union[ast.Assign, ast.AnnAssign]] = {}
+        self.used_in_with: set[str] = set()
+        self.parent_map: dict[ast.AST, ast.AST] = {}
+    def analyze(self, tree: ast.AST) -> list[Issue]:
+        """Run the detector on an AST and return issues found."""
+        # Build parent map once for the entire tree
+        self._build_parent_map(tree)
+        self.visit(tree)
+        return self.issues
+    def _build_parent_map(self, tree: ast.AST) -> None:
+        """Build a map of child -> parent for the entire tree."""
+        for parent in ast.walk(tree):
+            for child in ast.iter_child_nodes(parent):
+                self.parent_map[child] = parent
+    def get_detector_name(self) -> str:
+        """Return the name of this detector."""
+        return "context_manager"
+    def _create_issue(
+        self,
+        node: ast.AST,
+        *,
+        severity: Severity,
+        rule_id: str,
+        message: str,
+        suggestion: str,
+    ) -> Issue:
+        """Create an Issue object for context manager issues."""
+        return Issue(
+            file=self.file_path,
+            line=cast(int, getattr(node, "lineno", 0)),
+            column=cast(int, getattr(node, "col_offset", 0)),
+            severity=severity,
+            rule_id=rule_id,
+            message=message,
+            suggestion=suggestion,
+        )
+    def _is_context_manager_call(self, node: ast.Call) -> bool:
+        """Check if a call returns a context manager."""
+        # Check for direct function calls (e.g., open(), file())
+        if isinstance(node.func, ast.Name):
+            return node.func.id in CONTEXT_MANAGER_FUNCS
+        # Check for method calls (e.g., lock.acquire(), Path.open())
+        if isinstance(node.func, ast.Attribute):
+            return node.func.attr in CONTEXT_MANAGER_METHODS
+        return False
+    def _is_used_in_return(self, node: ast.Call) -> bool:
+        """Check if the call is part of a return statement."""
+        current = self.parent_map.get(node)
+        while current:
+            if isinstance(current, ast.Return):
+                return True
+            # Stop at function boundaries
+            if isinstance(current, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                return False
+            current = self.parent_map.get(current)
+        return False
+    def _is_used_in_with_context(self, node: ast.Call) -> bool:
+        """Check if the call is already used in a 'with' statement."""
+        current = self.parent_map.get(node)
+        while current:
+            if isinstance(current, ast.With):
+                return True
+            # Stop at function boundaries
+            if isinstance(current, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                return False
+            current = self.parent_map.get(current)
+        return False
+    def visit_Assign(self, node: ast.Assign) -> None:
+        """Check for resource-allocating assignments."""
+        if self.is_suppressed(node):
+            self.generic_visit(node)
+            return
+        # Check if the value is a context manager call
+        if isinstance(node.value, ast.Call) and self._is_context_manager_call(
+            node.value
+        ):
+            self._check_and_report_context_manager(node, node.value)
+        self.generic_visit(node)
+    def visit_Expr(self, node: ast.Expr) -> None:
+        """Check for context manager calls used as statements without assignment."""
+        if self.is_suppressed(node):
+            self.generic_visit(node)
+            return
+        # Check if the expression contains a context manager call (could be chained)
+        cm_call = self._find_context_manager_call(node.value)
+        if cm_call:
+            self._check_and_report_context_manager(node, cm_call)
+        self.generic_visit(node)
+    def _check_and_report_context_manager(
+        self, node: ast.AST, cm_call: ast.Call
+    ) -> None:
+        """Check and report if a context manager call should use 'with' statement."""
+        # Skip if already in a with statement
+        if self._is_used_in_with_context(cm_call):
+            return
+        # Skip if this is in a return statement or being passed
+        if self._is_used_in_return(cm_call):
+            return
+        # Get the function name for a better error message
+        func_name = self._get_func_name(cm_call)
+        self.add_issue(
+            self._create_issue(
+                node,
+                severity=Severity.HIGH,
+                rule_id="R001",
+                message=f"Resource-allocating operation '{func_name}' should use 'with' statement",
+                suggestion=f"Use 'with {func_name}(...) as resource:' to ensure proper resource cleanup",
+            )
+        )
+    def _find_context_manager_call(self, node: ast.AST) -> Optional[ast.Call]:
+        """Find a context manager call in an expression tree."""
+        if isinstance(node, ast.Call) and self._is_context_manager_call(node):
+            return node
+        # Check nested calls (e.g., open(...).read())
+        if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute):
+            # Check the object being called
+            if isinstance(node.func.value, ast.Call) and self._is_context_manager_call(
+                node.func.value
+            ):
+                return node.func.value
+        return None
+    def _get_func_name(self, call: ast.Call) -> str:
+        """Extract the function name from a call node."""
+        if isinstance(call.func, ast.Name):
+            return call.func.id
+        if isinstance(call.func, ast.Attribute):
+            return call.func.attr
+        return "unknown"

pyrefactor/detectors/control_flow.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""Control flow simplification detector for PyRefactor."""
+import ast
+from typing import cast
+from ..ast_visitor import BaseDetector
+from ..models import Issue, Severity
+class ControlFlowDetector(BaseDetector):
+    """Detects unnecessary else/elif clauses after return/raise/break/continue."""
+    def get_detector_name(self) -> str:
+        """Return the name of this detector."""
+        return "control_flow"
+    def _create_issue(
+        self,
+        node: ast.AST,
+        *,
+        severity: Severity,
+        rule_id: str,
+        message: str,
+        suggestion: str,
+    ) -> Issue:
+        """Create an Issue object for control flow issues."""
+        return Issue(
+            file=self.file_path,
+            line=cast(int, getattr(node, "lineno", 0)),
+            column=cast(int, getattr(node, "col_offset", 0)),
+            severity=severity,
+            rule_id=rule_id,
+            message=message,
+            suggestion=suggestion,
+        )
+    # Map terminator types to rule IDs
+    _TERMINATOR_RULES = {
+        "return": "R002",
+        "raise": "R003",
+        "break": "R004",
+        "continue": "R005",
+    }
+    def visit_If(self, node: ast.If) -> None:
+        """Check for unnecessary else clauses."""
+        if self.is_suppressed(node):
+            self.generic_visit(node)
+            return
+        self._check_unnecessary_else(node)
+        self.generic_visit(node)
+    def _check_unnecessary_else(self, node: ast.If) -> None:
+        """Check if the else clause is unnecessary after a terminating statement."""
+        # Early return if no else clause
+        if not node.orelse:
+            return
+        # Check if if-body always terminates
+        if not self._always_terminates(node.body):
+            return
+        # Determine what kind of termination
+        terminator = self._get_terminator_type(node.body)
+        # Report issue if we have a known terminator
+        if terminator in self._TERMINATOR_RULES:
+            self._report_unnecessary_else(
+                node, self._TERMINATOR_RULES[terminator], terminator
+            )
+    def _always_terminates(self, body: list[ast.stmt]) -> bool:
+        """Check if a code block always terminates (return/raise/break/continue)."""
+        if not body:
+            return False
+        # Check the last statement
+        last_stmt = body[-1]
+        # Direct terminating statements
+        if isinstance(last_stmt, (ast.Return, ast.Raise, ast.Break, ast.Continue)):
+            return True
+        # If statement - check if all branches terminate
+        if isinstance(last_stmt, ast.If):
+            # Must have an else clause to ensure all paths terminate
+            if not last_stmt.orelse:
+                return False
+            # Check if both if and else terminate
+            if_terminates = self._always_terminates(last_stmt.body)
+            else_terminates = self._always_terminates(last_stmt.orelse)
+            return if_terminates and else_terminates
+        # Try statement - all branches must terminate
+        if isinstance(last_stmt, ast.Try):
+            try_terminates = self._always_terminates(last_stmt.body)
+            handlers_terminate = all(
+                self._always_terminates(handler.body) for handler in last_stmt.handlers
+            )
+            # If there's an else clause, it must also terminate
+            else_terminates = (
+                self._always_terminates(last_stmt.orelse) if last_stmt.orelse else True
+            )
+            # Finally doesn't affect termination
+            return try_terminates and handlers_terminate and else_terminates
+        return False
+    def _get_terminator_type(self, body: list[ast.stmt]) -> str:
+        """Get the type of terminator in a code block."""
+        if not body:
+            return ""
+        last_stmt = body[-1]
+        # Map statement types to their string names
+        terminator_map = {
+            ast.Return: "return",
+            ast.Raise: "raise",
+            ast.Break: "break",
+            ast.Continue: "continue",
+        }
+        stmt_type = type(last_stmt)
+        if stmt_type in terminator_map:
+            return terminator_map[stmt_type]
+        # Check nested structures
+        if isinstance(last_stmt, ast.If):
+            # Get terminator from if body (assuming we've already checked it terminates)
+            return self._get_terminator_type(last_stmt.body)
+        return ""
+    def _report_unnecessary_else(
+        self, node: ast.If, rule_id: str, terminator: str
+    ) -> None:
+        """Report an unnecessary else clause."""
+        # Determine if it's an elif or else
+        if len(node.orelse) == 1 and isinstance(node.orelse[0], ast.If):
+            clause_type = "elif"
+        else:
+            clause_type = "else"
+        self.add_issue(
+            self._create_issue(
+                node,
+                severity=Severity.MEDIUM,
+                rule_id=rule_id,
+                message=f"Unnecessary '{clause_type}' after '{terminator}' statement",
+                suggestion=f"Remove '{clause_type}' and unindent its body since the "
+                f"preceding code always executes '{terminator}'",
+            )
+        )

pyrefactor/detectors/dict_operations.py ADDED Viewed

@@ -0,0 +1,346 @@
+"""Dictionary operations detector for PyRefactor."""
+import ast
+from typing import Optional, Tuple, cast
+from ..ast_visitor import BaseDetector
+from ..models import Issue, Severity
+class DictOperationsDetector(BaseDetector):
+    """Detects inefficient or non-idiomatic dictionary operations."""
+    def get_detector_name(self) -> str:
+        """Return the name of this detector."""
+        return "dict_operations"
+    def _create_issue(
+        self,
+        node: ast.AST,
+        *,
+        severity: Severity,
+        rule_id: str,
+        message: str,
+        suggestion: str,
+    ) -> Issue:
+        """Create an Issue object for dictionary operation issues."""
+        return Issue(
+            file=self.file_path,
+            line=cast(int, getattr(node, "lineno", 0)),
+            column=cast(int, getattr(node, "col_offset", 0)),
+            severity=severity,
+            rule_id=rule_id,
+            message=message,
+            suggestion=suggestion,
+        )
+    def visit_If(self, node: ast.If) -> None:
+        """Check for dict.get() opportunities."""
+        if self.is_suppressed(node):
+            self.generic_visit(node)
+            return
+        # Pattern: if key in dict: x = dict[key] else: x = default
+        self._check_dict_get_pattern(node)
+        self.generic_visit(node)
+    def _check_dict_get_pattern(self, node: ast.If) -> None:
+        """Check for pattern that could use dict.get()."""
+        # Validate basic structure
+        if not self._is_valid_dict_get_structure(node):
+            return
+        # Extract and validate components
+        components = self._extract_dict_get_components(node)
+        if not components:
+            return
+        var_name, key_name, dict_name, default_val = components
+        self.add_issue(
+            self._create_issue(
+                node,
+                severity=Severity.LOW,
+                rule_id="R006",
+                message="Consider using dict.get() instead of if/else for key lookup",
+                suggestion=f"Use '{var_name} = {dict_name}.get({key_name}, {default_val})' "
+                f"instead of if/else block",
+            )
+        )
+    def _is_valid_dict_get_structure(self, node: ast.If) -> bool:
+        """Check if node has the basic structure for dict.get() refactoring."""
+        # Check if condition is "key in dict"
+        if not isinstance(node.test, ast.Compare):
+            return False
+        if len(node.test.ops) != 1 or not isinstance(node.test.ops[0], ast.In):
+            return False
+        # Must have both if and else branches with single assignments
+        if not node.orelse or len(node.body) != 1 or len(node.orelse) != 1:
+            return False
+        if_stmt = node.body[0]
+        else_stmt = node.orelse[0]
+        return isinstance(if_stmt, ast.Assign) and isinstance(else_stmt, ast.Assign)
+    def _extract_dict_get_components(
+        self, node: ast.If
+    ) -> Optional[Tuple[str, str, str, str]]:
+        """Extract variable names and values for dict.get() suggestion."""
+        if_stmt = node.body[0]
+        else_stmt = node.orelse[0]
+        # Validate assignments structure
+        if not self._validate_assignment_structure(if_stmt, else_stmt):
+            return None
+        # Cast to Assign after validation
+        if_assign = cast(ast.Assign, if_stmt)
+        else_assign = cast(ast.Assign, else_stmt)
+        # Extract and validate condition components
+        condition_data = self._extract_condition_data(node.test)
+        if not condition_data:
+            return None
+        key_name, dict_name = condition_data
+        # Verify if_stmt accesses dict[key]
+        if not self._verify_dict_key_access(if_assign, dict_name, key_name):
+            return None
+        var_name = cast(ast.Name, if_assign.targets[0]).id
+        default_val = (
+            ast.unparse(else_assign.value) if hasattr(ast, "unparse") else "..."
+        )
+        return (var_name, key_name.id, dict_name.id, default_val)
+    def _validate_assignment_structure(
+        self, if_stmt: ast.stmt, else_stmt: ast.stmt
+    ) -> bool:
+        """Validate that both if and else branches have valid assignment structure."""
+        # Check both are assignments
+        if not (isinstance(if_stmt, ast.Assign) and isinstance(else_stmt, ast.Assign)):
+            return False
+        # Check both have exactly one target
+        if len(if_stmt.targets) != 1 or len(else_stmt.targets) != 1:
+            return False
+        # Check both targets are simple names
+        if not (
+            isinstance(if_stmt.targets[0], ast.Name)
+            and isinstance(else_stmt.targets[0], ast.Name)
+        ):
+            return False
+        # Both should assign to the same variable
+        if if_stmt.targets[0].id != else_stmt.targets[0].id:
+            return False
+        # Validate if-body is dict[key] access
+        return isinstance(if_stmt.value, ast.Subscript)
+    def _extract_condition_data(
+        self, test: ast.expr
+    ) -> Optional[Tuple[ast.Name, ast.Name]]:
+        """Extract key and dict names from the condition."""
+        if not isinstance(test, ast.Compare):
+            return None
+        key_name = test.left
+        if not test.comparators:
+            return None
+        dict_name = test.comparators[0]
+        if not isinstance(key_name, ast.Name) or not isinstance(dict_name, ast.Name):
+            return None
+        return (key_name, dict_name)
+    def _verify_dict_key_access(
+        self, if_stmt: ast.Assign, dict_name: ast.Name, key_name: ast.Name
+    ) -> bool:
+        """Verify that if_stmt accesses dict[key] correctly."""
+        # Check if value is a subscript
+        if not isinstance(if_stmt.value, ast.Subscript):
+            return False
+        # Check if the subscript is on the correct dict
+        if not isinstance(if_stmt.value.value, ast.Name):
+            return False
+        if if_stmt.value.value.id != dict_name.id:
+            return False
+        # Check if the slice uses the correct key
+        if not isinstance(if_stmt.value.slice, ast.Name):
+            return False
+        if if_stmt.value.slice.id != key_name.id:
+            return False
+        return True
+    def visit_For(self, node: ast.For) -> None:
+        """Check for dictionary iteration improvements."""
+        if self.is_suppressed(node):
+            self.generic_visit(node)
+            return
+        # Check for .keys() that should be removed
+        self._check_unnecessary_keys(node)
+        # Check for .items() opportunity
+        self._check_dict_items_opportunity(node)
+        self.generic_visit(node)
+    def _check_unnecessary_keys(self, node: ast.For) -> None:
+        """Check for unnecessary .keys() in for loop."""
+        # Pattern: for key in dict.keys()
+        if not isinstance(node.iter, ast.Call):
+            return
+        if not isinstance(node.iter.func, ast.Attribute):
+            return
+        if node.iter.func.attr != "keys":
+            return
+        dict_name = self._get_name(node.iter.func.value)
+        if not dict_name:
+            return
+        target_name = self._get_target_name(node.target)
+        self.add_issue(
+            self._create_issue(
+                node,
+                severity=Severity.INFO,
+                rule_id="R009",
+                message="Unnecessary .keys() call when iterating dictionary",
+                suggestion=f"Use 'for {target_name} in {dict_name}:' "
+                f"instead of 'for {target_name} in {dict_name}.keys():'",
+            )
+        )
+    def _check_dict_items_opportunity(self, node: ast.For) -> None:
+        """Check if loop iterates keys but also accesses values."""
+        # Pattern: for key in dict: ... dict[key] ...
+        if not isinstance(node.target, ast.Name):
+            return
+        # Get the iterable name
+        iter_name = self._get_name(node.iter)
+        if not iter_name:
+            return
+        key_name = node.target.id
+        # Check if body contains dict[key] accesses
+        if self._has_dict_key_access(node.body, iter_name, key_name):
+            self.add_issue(
+                self._create_issue(
+                    node,
+                    severity=Severity.MEDIUM,
+                    rule_id="R007",
+                    message="Consider using .items() to access both keys and values",
+                    suggestion=f"Use 'for {key_name}, value in {iter_name}.items():' "
+                    f"to avoid repeated dict lookups",
+                )
+            )
+    def _has_dict_key_access(
+        self, body: list[ast.stmt], dict_name: str, key_name: str
+    ) -> bool:
+        """Check if body contains dict[key] access pattern."""
+        for stmt in body:
+            for child in ast.walk(stmt):
+                if self._is_dict_key_subscript(child, dict_name, key_name):
+                    return True
+        return False
+    def _is_dict_key_subscript(
+        self, node: ast.AST, dict_name: str, key_name: str
+    ) -> bool:
+        """Check if node is a dict[key] subscript."""
+        # Check if node is a subscript
+        if not isinstance(node, ast.Subscript):
+            return False
+        # Check if subscript is on the correct dict
+        if not isinstance(node.value, ast.Name):
+            return False
+        if node.value.id != dict_name:
+            return False
+        # Check if slice is the correct key
+        if not isinstance(node.slice, ast.Name):
+            return False
+        if node.slice.id != key_name:
+            return False
+        return True
+    def visit_Call(self, node: ast.Call) -> None:
+        """Check for dict comprehension opportunities."""
+        if self.is_suppressed(node):
+            self.generic_visit(node)
+            return
+        # Pattern: dict([(k, v) for ...]) or dict([...])
+        self._check_dict_comprehension(node)
+        self.generic_visit(node)
+    def _check_dict_comprehension(self, node: ast.Call) -> None:
+        """Check if dict() call can be replaced with dict comprehension."""
+        # Check if it's a dict() call
+        if not isinstance(node.func, ast.Name):
+            return
+        if node.func.id != "dict":
+            return
+        # Check if it has arguments
+        if not node.args:
+            return
+        arg = node.args[0]
+        # Check if it's a list comprehension with tuples
+        if not isinstance(arg, ast.ListComp):
+            return
+        # Check if element is a 2-tuple
+        if not isinstance(arg.elt, ast.Tuple):
+            return
+        if len(arg.elt.elts) != 2:
+            return
+        self.add_issue(
+            self._create_issue(
+                node,
+                severity=Severity.LOW,
+                rule_id="R010",
+                message="Consider using dictionary comprehension instead of dict()",
+                suggestion="Use '{k: v for ...}' instead of 'dict([(k, v) for ...])' "
+                "for better readability and performance",
+            )
+        )
+    def _get_name(self, node: ast.AST) -> Optional[str]:
+        """Extract the name from a node."""
+        if isinstance(node, ast.Name):
+            return node.id
+        if isinstance(node, ast.Attribute):
+            return node.attr
+        return None
+    def _get_target_name(self, node: ast.AST) -> str:
+        """Get the target name from a for loop target."""
+        if isinstance(node, ast.Name):
+            return node.id
+        return "item"