PyPI - diff-code-change-range - Versions diffs - 0.0.1__py3-none-any.whl - Mend

diff-code-change-range 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

diff_code_change_range/__init__.py +17 -0
diff_code_change_range/__main__.py +7 -0
diff_code_change_range/affected_marker.py +173 -0
diff_code_change_range/cli.py +167 -0
diff_code_change_range/diff_parser.py +218 -0
diff_code_change_range/reference/__init__.py +59 -0
diff_code_change_range/reference/analyzer.py +555 -0
diff_code_change_range/reference/code_slicer.py +58 -0
diff_code_change_range/reference/differ.py +80 -0
diff_code_change_range/reference/extractor.py +130 -0
diff_code_change_range/reference/models.py +85 -0
diff_code_change_range/reference/scope_parser.py +79 -0
diff_code_change_range/structure_extractor.py +750 -0
diff_code_change_range/yaml_reporter.py +89 -0
diff_code_change_range-0.0.1.dist-info/METADATA +386 -0
diff_code_change_range-0.0.1.dist-info/RECORD +19 -0
diff_code_change_range-0.0.1.dist-info/WHEEL +5 -0
diff_code_change_range-0.0.1.dist-info/entry_points.txt +2 -0
diff_code_change_range-0.0.1.dist-info/top_level.txt +1 -0

diff_code_change_range/reference/analyzer.py ADDED Viewed

@@ -0,0 +1,555 @@
+"""Reference analyzer using Tree-sitter AST."""
+import sys
+from typing import List, Optional, Dict, Set
+from tree_sitter import Language, Parser, Node
+from .models import Reference, ReferenceType, QualifiedNode
+from .code_slicer import CodeSlicer
+try:
+    import tree_sitter_java as ts_java
+    JAVA_LANGUAGE = Language(ts_java.language())
+    JAVA_AVAILABLE = True
+except ImportError:
+    JAVA_AVAILABLE = False
+    JAVA_LANGUAGE = None
+try:
+    import tree_sitter_kotlin as ts_kotlin
+    KOTLIN_LANGUAGE = Language(ts_kotlin.language())
+    KOTLIN_AVAILABLE = True
+except ImportError:
+    KOTLIN_AVAILABLE = False
+    KOTLIN_LANGUAGE = None
+try:
+    import tree_sitter_python as ts_python
+    PYTHON_LANGUAGE = Language(ts_python.language())
+    PYTHON_AVAILABLE = True
+except ImportError:
+    PYTHON_AVAILABLE = False
+    PYTHON_LANGUAGE = None
+class ReferenceAnalyzer:
+    """Analyzes code to find references to other nodes."""
+    # System classes/packages to filter out
+    SYSTEM_PREFIXES = {
+        'java.', 'javax.', 'kotlin.', 'kotlinx.',
+        'android.', 'androidx.', 'com.android.',
+    }
+    def __init__(self, target_nodes: List[QualifiedNode], file_path: str):
+        """
+        Initialize analyzer with target nodes to look for.
+        Args:
+            target_nodes: List of nodes that can be referenced
+            file_path: Path of the file being analyzed
+        """
+        self.target_nodes = target_nodes
+        self.file_path = file_path
+        self._build_target_index()
+    def _build_target_index(self):
+        """Build index for fast target lookup."""
+        # Index by simple name
+        self.targets_by_name: Dict[str, List[QualifiedNode]] = {}
+        # Index by file
+        self.targets_by_file: Dict[str, List[QualifiedNode]] = {}
+        for node in self.target_nodes:
+            name = node.name
+            if name not in self.targets_by_name:
+                self.targets_by_name[name] = []
+            self.targets_by_name[name].append(node)
+            file_path = node.file_path
+            if file_path not in self.targets_by_file:
+                self.targets_by_file[file_path] = []
+            self.targets_by_file[file_path].append(node)
+    def analyze(
+        self,
+        source_code: str,
+        source_node: QualifiedNode
+    ) -> List[Reference]:
+        """
+        Analyze source code and find references to target nodes.
+        Args:
+            source_code: Full source code of the file
+            source_node: The node being analyzed (source of references)
+        Returns:
+            List of references found
+        """
+        references = []
+        # Get language parser
+        language = self._get_language()
+        if not language:
+            return references
+        # Use the full file source code, not just the snippet
+        # This gives better context for parsing
+        parser = Parser(language)
+        try:
+            tree = parser.parse(bytes(source_code, 'utf8'))
+        except Exception as e:
+            print(f"Warning: Failed to parse {self.file_path}: {e}", file=sys.stderr)
+            return references
+        # Find the specific node in the AST based on line range
+        target_node = self._find_node_at_line_range(tree.root_node, source_node.line_range)
+        if not target_node:
+            return references
+        # Find references based on file type
+        if self.file_path.endswith('.kt'):
+            references = self._analyze_kotlin_node(target_node, source_code, source_node)
+        elif self.file_path.endswith('.java'):
+            references = self._analyze_java_node(target_node, source_code, source_node)
+        elif self.file_path.endswith('.py'):
+            references = self._analyze_python_node(target_node, source_code, source_node)
+        return references
+    def _find_node_at_line_range(self, root: Node, line_range: tuple) -> Optional[Node]:
+        """Find AST node that matches the given line range."""
+        start_line, end_line = line_range
+        def walk(node: Node) -> Optional[Node]:
+            node_start = node.start_point[0] + 1  # 0-based to 1-based
+            node_end = node.end_point[0] + 1
+            # Check if this node spans the target range
+            if node_start <= start_line and node_end >= end_line:
+                # Check children first (more specific match)
+                for child in node.children:
+                    result = walk(child)
+                    if result:
+                        return result
+                return node
+            return None
+        return walk(root)
+    def _get_language(self) -> Optional[Language]:
+        """Get the appropriate language for the file."""
+        if self.file_path.endswith('.java') and JAVA_AVAILABLE:
+            return JAVA_LANGUAGE
+        elif self.file_path.endswith('.kt') and KOTLIN_AVAILABLE:
+            return KOTLIN_LANGUAGE
+        elif self.file_path.endswith('.py') and PYTHON_AVAILABLE:
+            return PYTHON_LANGUAGE
+        return None
+    def _analyze_kotlin_node(
+        self,
+        node: Node,
+        source_code: str,
+        source_node: QualifiedNode
+    ) -> List[Reference]:
+        """Analyze a Kotlin AST node for references."""
+        references = []
+        def walk(node: Node):
+            node_type = node.type
+            # Method/Function call
+            if node_type == 'call_expression':
+                ref = self._extract_call_reference(node, source_code, source_node, 'kotlin')
+                if ref:
+                    references.append(ref)
+            # Field/Property access - look for simple_identifier in expressions
+            elif node_type == 'simple_identifier':
+                # Check if this identifier is a field reference
+                parent = node.parent
+                if parent and parent.type in ('navigation_expression', 'assignment', 'property_delegate'):
+                    ref = self._extract_identifier_reference(node, source_code, source_node)
+                    if ref:
+                        references.append(ref)
+            # Type reference
+            elif node_type in ('type_reference', 'user_type'):
+                ref = self._extract_type_reference(node, source_code, source_node)
+                if ref:
+                    references.append(ref)
+            # Constructor call (object_creation)
+            elif node_type == 'object_literal':
+                ref = self._extract_instantiation(node, source_code, source_node)
+                if ref:
+                    references.append(ref)
+            # Annotation
+            elif node_type == 'annotation':
+                ref = self._extract_annotation_reference(node, source_code, source_node)
+                if ref:
+                    references.append(ref)
+            # Recurse into children
+            for child in node.children:
+                walk(child)
+        walk(node)
+        return references
+    def _analyze_java_node(
+        self,
+        node: Node,
+        source_code: str,
+        source_node: QualifiedNode
+    ) -> List[Reference]:
+        """Analyze a Java AST node for references."""
+        references = []
+        def walk(node: Node):
+            node_type = node.type
+            # Method call
+            if node_type == 'method_invocation':
+                ref = self._extract_call_reference(node, source_code, source_node, 'java')
+                if ref:
+                    references.append(ref)
+            # Field access
+            elif node_type == 'field_access':
+                ref = self._extract_field_access(node, source_code, source_node)
+                if ref:
+                    references.append(ref)
+            # Simple name that might be a field
+            elif node_type == 'identifier':
+                parent = node.parent
+                if parent and parent.type not in ('method_invocation', 'field_access', 'class_declaration'):
+                    ref = self._extract_identifier_reference(node, source_code, source_node)
+                    if ref:
+                        references.append(ref)
+            # Type reference
+            elif node_type in ('type_identifier', 'generic_type'):
+                ref = self._extract_type_reference(node, source_code, source_node)
+                if ref:
+                    references.append(ref)
+            # Constructor call
+            elif node_type == 'object_creation_expression':
+                ref = self._extract_instantiation(node, source_code, source_node)
+                if ref:
+                    references.append(ref)
+            # Annotation
+            elif node_type == 'annotation':
+                ref = self._extract_annotation_reference(node, source_code, source_node)
+                if ref:
+                    references.append(ref)
+            for child in node.children:
+                walk(child)
+        walk(node)
+        return references
+    def _analyze_python_node(
+        self,
+        node: Node,
+        source_code: str,
+        source_node: QualifiedNode
+    ) -> List[Reference]:
+        """Analyze a Python AST node for references."""
+        references = []
+        def walk(node: Node):
+            node_type = node.type
+            # Function/Method call
+            if node_type == 'call':
+                ref = self._extract_call_reference(node, source_code, source_node, 'python')
+                if ref:
+                    references.append(ref)
+            # Attribute access
+            elif node_type == 'attribute':
+                ref = self._extract_attribute_access(node, source_code, source_node)
+                if ref:
+                    references.append(ref)
+            # Simple identifier (might be a variable/field reference)
+            elif node_type == 'identifier':
+                parent = node.parent
+                if parent and parent.type not in ('call', 'attribute', 'function_definition', 'class_definition'):
+                    ref = self._extract_identifier_reference(node, source_code, source_node)
+                    if ref:
+                        references.append(ref)
+            for child in node.children:
+                walk(child)
+        walk(node)
+        return references
+    def _extract_call_reference(
+        self,
+        node: Node,
+        source_code: str,
+        source_node: QualifiedNode,
+        language: str
+    ) -> Optional[Reference]:
+        """Extract method/function call reference."""
+        func_name = None
+        arg_count = 0
+        for child in node.children:
+            if child.type in ('identifier', 'simple_identifier'):
+                func_name = source_code[child.start_byte:child.end_byte]
+                break
+            elif child.type == 'value_arguments' or child.type == 'argument_list':
+                arg_count = self._count_arguments(child)
+        if not func_name:
+            return None
+        target = self._find_matching_target(func_name, arg_count)
+        if not target:
+            return None
+        abs_line = node.start_point[0] + 1
+        return Reference(
+            source=source_node.qualified_path,
+            target=target.qualified_path,
+            type=ReferenceType.METHOD_CALL,
+            line=abs_line
+        )
+    def _extract_identifier_reference(
+        self,
+        node: Node,
+        source_code: str,
+        source_node: QualifiedNode
+    ) -> Optional[Reference]:
+        """Extract reference from a simple identifier."""
+        name = source_code[node.start_byte:node.end_byte]
+        if not name or self._is_system_type(name):
+            return None
+        target = self._find_matching_target(name)
+        if not target:
+            return None
+        abs_line = node.start_point[0] + 1
+        return Reference(
+            source=source_node.qualified_path,
+            target=target.qualified_path,
+            type=ReferenceType.FIELD_ACCESS,
+            line=abs_line
+        )
+    def _extract_field_access(
+        self,
+        node: Node,
+        source_code: str,
+        source_node: QualifiedNode
+    ) -> Optional[Reference]:
+        """Extract field access (Java style)."""
+        field_name = None
+        for child in node.children:
+            if child.type == 'identifier':
+                field_name = source_code[child.start_byte:child.end_byte]
+                break
+        if not field_name:
+            return None
+        target = self._find_matching_target(field_name)
+        if not target:
+            return None
+        abs_line = node.start_point[0] + 1
+        return Reference(
+            source=source_node.qualified_path,
+            target=target.qualified_path,
+            type=ReferenceType.FIELD_ACCESS,
+            line=abs_line
+        )
+    def _extract_attribute_access(
+        self,
+        node: Node,
+        source_code: str,
+        source_node: QualifiedNode
+    ) -> Optional[Reference]:
+        """Extract attribute access (Python style)."""
+        attr_name = None
+        for child in node.children:
+            if child.type == 'identifier':
+                attr_name = source_code[child.start_byte:child.end_byte]
+                break
+        if not attr_name:
+            return None
+        target = self._find_matching_target(attr_name)
+        if not target:
+            return None
+        abs_line = node.start_point[0] + 1
+        return Reference(
+            source=source_node.qualified_path,
+            target=target.qualified_path,
+            type=ReferenceType.FIELD_ACCESS,
+            line=abs_line
+        )
+    def _extract_type_reference(
+        self,
+        node: Node,
+        source_code: str,
+        source_node: QualifiedNode
+    ) -> Optional[Reference]:
+        """Extract type reference."""
+        type_name = None
+        for child in node.children:
+            if child.type in ('identifier', 'simple_identifier', 'type_identifier'):
+                type_name = source_code[child.start_byte:child.end_byte]
+                break
+        if not type_name or self._is_system_type(type_name):
+            return None
+        target = self._find_matching_target(type_name)
+        if not target:
+            return None
+        abs_line = node.start_point[0] + 1
+        return Reference(
+            source=source_node.qualified_path,
+            target=target.qualified_path,
+            type=ReferenceType.TYPE_REFERENCE,
+            line=abs_line
+        )
+    def _extract_instantiation(
+        self,
+        node: Node,
+        source_code: str,
+        source_node: QualifiedNode
+    ) -> Optional[Reference]:
+        """Extract object instantiation reference."""
+        class_name = None
+        for child in node.children:
+            if child.type in ('identifier', 'simple_identifier', 'type_identifier', 'user_type'):
+                class_name = source_code[child.start_byte:child.end_byte]
+                break
+        if not class_name or self._is_system_type(class_name):
+            return None
+        target = self._find_matching_target(class_name)
+        if not target:
+            return None
+        abs_line = node.start_point[0] + 1
+        return Reference(
+            source=source_node.qualified_path,
+            target=target.qualified_path,
+            type=ReferenceType.INSTANTIATION,
+            line=abs_line
+        )
+    def _extract_annotation_reference(
+        self,
+        node: Node,
+        source_code: str,
+        source_node: QualifiedNode
+    ) -> Optional[Reference]:
+        """Extract annotation reference."""
+        annotation_name = None
+        for child in node.children:
+            if child.type in ('identifier', 'simple_identifier', 'user_type'):
+                annotation_name = source_code[child.start_byte:child.end_byte]
+                break
+        if not annotation_name:
+            return None
+        target = self._find_matching_target(annotation_name)
+        if not target:
+            return None
+        abs_line = node.start_point[0] + 1
+        return Reference(
+            source=source_node.qualified_path,
+            target=target.qualified_path,
+            type=ReferenceType.ANNOTATION,
+            line=abs_line
+        )
+    def _find_matching_target(self, name: str, arg_count: int = -1) -> Optional[QualifiedNode]:
+        """Find target node matching the given name."""
+        if name not in self.targets_by_name:
+            return None
+        candidates = self.targets_by_name[name]
+        if not candidates:
+            return None
+        # If only one candidate, return it
+        if len(candidates) == 1:
+            return candidates[0]
+        # Multiple candidates: apply heuristics
+        # 1. Prefer same file
+        same_file = [c for c in candidates if c.file_path == self.file_path]
+        if same_file:
+            candidates = same_file
+            if len(candidates) == 1:
+                return candidates[0]
+        # 2. Match by argument count (for methods)
+        if arg_count >= 0:
+            # This is a simplified check
+            pass
+        return candidates[0]
+    def _count_arguments(self, node: Node) -> int:
+        """Count arguments in an argument list."""
+        count = 0
+        for child in node.children:
+            if child.type in ('value_argument', 'argument', 'positional_argument'):
+                count += 1
+        return count
+    def _is_system_type(self, type_name: str) -> bool:
+        """Check if type is a system/builtin type."""
+        builtin_types = {
+            'String', 'Int', 'Long', 'Boolean', 'Double', 'Float', 'Char', 'Byte', 'Short',
+            'Integer', 'Boolean', 'Character', 'Void', 'Object', 'Class',
+            'List', 'Map', 'Set', 'Collection', 'Iterable', 'Iterator',
+            'ArrayList', 'HashMap', 'HashSet',
+            'int', 'str', 'bool', 'float', 'list', 'dict', 'set', 'tuple',
+            'Any', 'Unit', 'Nothing', 'Throwable', 'Exception',
+            'true', 'false', 'null', 'println', 'print', 'repeat',
+        }
+        if type_name in builtin_types:
+            return True
+        # Check prefixes
+        for prefix in self.SYSTEM_PREFIXES:
+            if type_name.startswith(prefix):
+                return True
+        return False

diff_code_change_range/reference/code_slicer.py ADDED Viewed

@@ -0,0 +1,58 @@
+"""Code slicer for extracting code snippets by line range."""
+from typing import Optional, Tuple
+class CodeSlicer:
+    """Extracts code snippets from source code by line range."""
+    @staticmethod
+    def extract(source_code: str, line_range: Tuple[int, int]) -> str:
+        """
+        Extract code lines from source by line range (1-based, inclusive).
+        Args:
+            source_code: The full source code
+            line_range: Tuple of (start_line, end_line) - 1-based, inclusive
+        Returns:
+            Extracted code snippet
+        """
+        if not source_code:
+            return ""
+        lines = source_code.split('\n')
+        start_line, end_line = line_range
+        # Handle edge cases
+        if start_line < 1:
+            start_line = 1
+        if end_line > len(lines):
+            end_line = len(lines)
+        if start_line > end_line:
+            return ""
+        # Extract lines (convert to 0-based indexing)
+        extracted = lines[start_line - 1:end_line]
+        return '\n'.join(extracted)
+    @staticmethod
+    def get_line_at(source_code: str, line_number: int) -> Optional[str]:
+        """
+        Get a specific line from source code.
+        Args:
+            source_code: The full source code
+            line_number: 1-based line number
+        Returns:
+            The line content or None if out of bounds
+        """
+        if not source_code or line_number < 1:
+            return None
+        lines = source_code.split('\n')
+        if line_number > len(lines):
+            return None
+        return lines[line_number - 1]

diff_code_change_range/reference/differ.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""Reference differ for computing added and removed references."""
+from typing import List, Set
+from .models import Reference, ReferenceResult
+class ReferenceDiffer:
+    """Computes differences between before and after reference sets."""
+    @staticmethod
+    def compute_diff(
+        before_refs: List[Reference],
+        after_refs: List[Reference]
+    ) -> ReferenceResult:
+        """
+        Compute added and removed references.
+        Two references are considered equal if they have the same source,
+        target, and type. Line numbers are ignored.
+        Args:
+            before_refs: References from before version
+            after_refs: References from after version
+        Returns:
+            ReferenceResult with before, after, added, and removed references
+        """
+        # Convert to sets for comparison (Reference.__eq__ ignores line number)
+        before_set = set(before_refs)
+        after_set = set(after_refs)
+        # Compute differences
+        added = after_set - before_set
+        removed = before_set - after_set
+        # Convert back to lists, preserving original line numbers from after/before
+        added_list = ReferenceDiffer._restore_line_numbers(
+            list(added), after_refs
+        )
+        removed_list = ReferenceDiffer._restore_line_numbers(
+            list(removed), before_refs
+        )
+        return ReferenceResult(
+            before_references=before_refs,
+            after_references=after_refs,
+            added_references=added_list,
+            removed_references=removed_list
+        )
+    @staticmethod
+    def _restore_line_numbers(
+        diff_refs: List[Reference],
+        original_refs: List[Reference]
+    ) -> List[Reference]:
+        """
+        Restore line numbers from original references.
+        When we convert to sets, we lose the original line numbers.
+        This restores them from the original list.
+        """
+        # Build lookup from original refs
+        original_map = {}
+        for ref in original_refs:
+            key = (ref.source, ref.target, ref.type)
+            original_map[key] = ref.line
+        # Restore line numbers
+        result = []
+        for ref in diff_refs:
+            key = (ref.source, ref.target, ref.type)
+            line = original_map.get(key, ref.line)
+            result.append(Reference(
+                source=ref.source,
+                target=ref.target,
+                type=ref.type,
+                line=line
+            ))
+        return result