PyPI - diff-code-change-range - Versions diffs - 0.0.1__py3-none-any.whl - Mend

diff-code-change-range 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

diff_code_change_range/__init__.py +17 -0
diff_code_change_range/__main__.py +7 -0
diff_code_change_range/affected_marker.py +173 -0
diff_code_change_range/cli.py +167 -0
diff_code_change_range/diff_parser.py +218 -0
diff_code_change_range/reference/__init__.py +59 -0
diff_code_change_range/reference/analyzer.py +555 -0
diff_code_change_range/reference/code_slicer.py +58 -0
diff_code_change_range/reference/differ.py +80 -0
diff_code_change_range/reference/extractor.py +130 -0
diff_code_change_range/reference/models.py +85 -0
diff_code_change_range/reference/scope_parser.py +79 -0
diff_code_change_range/structure_extractor.py +750 -0
diff_code_change_range/yaml_reporter.py +89 -0
diff_code_change_range-0.0.1.dist-info/METADATA +386 -0
diff_code_change_range-0.0.1.dist-info/RECORD +19 -0
diff_code_change_range-0.0.1.dist-info/WHEEL +5 -0
diff_code_change_range-0.0.1.dist-info/entry_points.txt +2 -0
diff_code_change_range-0.0.1.dist-info/top_level.txt +1 -0

diff_code_change_range/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""diff-code-change-range: Extract affected code structures from git diff output."""
+__version__ = "0.1.0"
+from .diff_parser import parse_diff, FileChange
+from .structure_extractor import extract_structure, CodeNode
+from .affected_marker import mark_affected_nodes
+from .yaml_reporter import generate_yaml_report
+__all__ = [
+    "parse_diff",
+    "FileChange",
+    "extract_structure",
+    "CodeNode",
+    "mark_affected_nodes",
+    "generate_yaml_report",
+]

diff_code_change_range/__main__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Entry point for `python -m diff_code_change_range`."""
+import sys
+from .cli import main
+if __name__ == '__main__':
+    sys.exit(main())

diff_code_change_range/affected_marker.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""Affected node marker module for identifying changed code structures."""
+from typing import Set, Optional, List
+from .diff_parser import FileChange
+from .structure_extractor import CodeNode, NodeType
+def mark_affected_nodes(
+    structure: Optional[CodeNode],
+    file_change: FileChange,
+    is_before: bool
+) -> Optional[CodeNode]:
+    """
+    Mark affected nodes in a code structure based on changed lines.
+    Args:
+        structure: The code structure tree (can be None for deleted/added files)
+        file_change: The file change information
+        is_before: If True, use changed_old_lines; else use changed_new_lines
+    Returns:
+        Filtered structure containing only affected nodes, or None if no nodes affected
+    """
+    if structure is None:
+        return None
+    # Determine which changed lines to use
+    if is_before:
+        changed_lines = file_change.changed_old_lines
+    else:
+        changed_lines = file_change.changed_new_lines
+    # Handle pure add/delete files - mark all nodes as affected
+    if file_change.is_added or file_change.is_removed:
+        return _mark_all_affected(structure)
+    # For modified files, check line intersection
+    if not changed_lines:
+        return None
+    return _filter_affected_nodes(structure, changed_lines)
+def _line_range_intersects(node_range: tuple, changed_lines: Set[int]) -> bool:
+    """
+    Check if a node's line range intersects with changed lines.
+    Args:
+        node_range: Tuple of (start_line, end_line) - 1-based, inclusive
+        changed_lines: Set of changed line numbers
+    Returns:
+        True if any line in the range is in changed_lines
+    """
+    start, end = node_range
+    # Check if any line in [start, end] is in changed_lines
+    for line in range(start, end + 1):
+        if line in changed_lines:
+            return True
+    return False
+def _mark_all_affected(node: CodeNode) -> CodeNode:
+    """
+    Mark all nodes in the tree as affected.
+    Args:
+        node: Root node of the structure tree
+    Returns:
+        A new tree with all nodes marked as affected
+    """
+    new_children = [_mark_all_affected(child) for child in node.children]
+    return CodeNode(
+        name=node.name,
+        node_type=node.node_type,
+        line_range=node.line_range,
+        children=new_children,
+        is_affected=True
+    )
+def _filter_affected_nodes(node: CodeNode, changed_lines: Set[int]) -> Optional[CodeNode]:
+    """
+    Filter nodes to only include those affected by changed lines.
+    For leaf nodes (METHOD, FUNCTION, MEMBER), they are affected if their
+    line range intersects with changed lines.
+    For container nodes (CLASS, INTERFACE, OBJECT, ENUM, FILE), they are
+    affected if any of their children is affected.
+    Args:
+        node: Current node to process
+        changed_lines: Set of changed line numbers
+    Returns:
+        Filtered node if affected, None otherwise
+    """
+    # Determine if this is a leaf node type
+    is_leaf = node.node_type in (NodeType.METHOD, NodeType.FUNCTION, NodeType.MEMBER)
+    if is_leaf:
+        # Leaf node: affected if intersects with changed lines
+        if _line_range_intersects(node.line_range, changed_lines):
+            return CodeNode(
+                name=node.name,
+                node_type=node.node_type,
+                line_range=node.line_range,
+                children=[],  # Leaf nodes have no children
+                is_affected=True
+            )
+        return None
+    # Container node: process children and keep affected ones
+    affected_children = []
+    for child in node.children:
+        affected_child = _filter_affected_nodes(child, changed_lines)
+        if affected_child:
+            affected_children.append(affected_child)
+    # Container is affected if any child is affected
+    if affected_children:
+        return CodeNode(
+            name=node.name,
+            node_type=node.node_type,
+            line_range=node.line_range,
+            children=affected_children,
+            is_affected=True
+        )
+    # No affected children - check if container itself intersects with changes
+    # This handles cases where a class header line (e.g., "public class Foo {"
+    # or just the class declaration) is modified
+    if _line_range_intersects(node.line_range, changed_lines):
+        return CodeNode(
+            name=node.name,
+            node_type=node.node_type,
+            line_range=node.line_range,
+            children=affected_children,  # May be empty
+            is_affected=True
+        )
+    return None
+def process_file_change(file_change: FileChange) -> tuple:
+    """
+    Process a file change and return affected structures for before and after.
+    Args:
+        file_change: The file change to process
+    Returns:
+        Tuple of (before_structure, after_structure) where each is the
+        filtered CodeNode tree or None
+    """
+    from .structure_extractor import extract_structure
+    before_structure = None
+    after_structure = None
+    # Process before version
+    if file_change.source_path and file_change.old_source:
+        before_full = extract_structure(file_change.old_source, file_change.source_path)
+        before_structure = mark_affected_nodes(before_full, file_change, is_before=True)
+    # Process after version
+    if file_change.target_path and file_change.new_source:
+        after_full = extract_structure(file_change.new_source, file_change.target_path)
+        after_structure = mark_affected_nodes(after_full, file_change, is_before=False)
+    return before_structure, after_structure

diff_code_change_range/cli.py ADDED Viewed

@@ -0,0 +1,167 @@
+"""CLI entry point for diff-code-change-range."""
+import argparse
+import sys
+from typing import Optional
+from .diff_parser import parse_diff, FileChange
+from .structure_extractor import extract_structure, CodeNode
+from .affected_marker import mark_affected_nodes
+from .yaml_reporter import generate_and_write_report
+def create_parser() -> argparse.ArgumentParser:
+    """Create and configure the argument parser."""
+    parser = argparse.ArgumentParser(
+        prog='diff-code-change-range',
+        description='Extract affected code structures from git diff output for Java and Kotlin files'
+    )
+    parser.add_argument(
+        'diff_file',
+        nargs='?',
+        help='Path to diff file (default: read from stdin)'
+    )
+    parser.add_argument(
+        '-v', '--version',
+        action='version',
+        version='%(prog)s 0.1.0'
+    )
+    return parser
+def read_diff_input(diff_file: Optional[str]) -> str:
+    """
+    Read diff input from file or stdin.
+    Args:
+        diff_file: Path to diff file, or None to read from stdin
+    Returns:
+        The diff text content
+    Raises:
+        FileNotFoundError: If the specified file doesn't exist
+        IOError: If there's an error reading the file
+    """
+    if diff_file:
+        try:
+            with open(diff_file, 'r', encoding='utf-8') as f:
+                return f.read()
+        except FileNotFoundError:
+            print(f"Error: File not found: {diff_file}", file=sys.stderr)
+            raise
+        except IOError as e:
+            print(f"Error reading file: {e}", file=sys.stderr)
+            raise
+    else:
+        return sys.stdin.read()
+def process_diff(diff_text: str) -> tuple:
+    """
+    Process diff text and return before/after structures.
+    Args:
+        diff_text: The unified diff text
+    Returns:
+        Tuple of (before_structures, after_structures) lists
+    """
+    file_changes = parse_diff(diff_text)
+    before_structures = []
+    after_structures = []
+    for file_change in file_changes:
+        before, after = _process_single_file(file_change)
+        if before:
+            before_structures.append(before)
+        if after:
+            after_structures.append(after)
+    return before_structures, after_structures
+def _process_single_file(file_change: FileChange) -> tuple:
+    """
+    Process a single file change.
+    Args:
+        file_change: The file change to process
+    Returns:
+        Tuple of (before_structure, after_structure)
+    """
+    before_structure = None
+    after_structure = None
+    # Process before version
+    if file_change.source_path and file_change.old_source:
+        try:
+            before_full = extract_structure(file_change.old_source, file_change.source_path)
+            if before_full:
+                before_structure = mark_affected_nodes(before_full, file_change, is_before=True)
+        except Exception as e:
+            print(f"Warning: Failed to process before version of {file_change.source_path}: {e}",
+                  file=sys.stderr)
+    # Process after version
+    if file_change.target_path and file_change.new_source:
+        try:
+            after_full = extract_structure(file_change.new_source, file_change.target_path)
+            if after_full:
+                after_structure = mark_affected_nodes(after_full, file_change, is_before=False)
+        except Exception as e:
+            print(f"Warning: Failed to process after version of {file_change.target_path}: {e}",
+                  file=sys.stderr)
+    return before_structure, after_structure
+def main(args: Optional[list] = None) -> int:
+    """
+    Main entry point for the CLI.
+    Args:
+        args: Command line arguments (default: sys.argv[1:])
+    Returns:
+        Exit code (0 for success, 1 for error)
+    """
+    parser = create_parser()
+    parsed_args = parser.parse_args(args)
+    try:
+        # Read diff input
+        diff_text = read_diff_input(parsed_args.diff_file)
+        if not diff_text.strip():
+            # Empty input - output empty result
+            generate_and_write_report([], [])
+            return 0
+        # Process diff
+        before_structures, after_structures = process_diff(diff_text)
+        # Generate and output report
+        generate_and_write_report(before_structures, after_structures)
+        return 0
+    except FileNotFoundError:
+        return 1
+    except IOError:
+        return 1
+    except KeyboardInterrupt:
+        print("\nInterrupted", file=sys.stderr)
+        return 130
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        return 1
+if __name__ == '__main__':
+    sys.exit(main())

diff_code_change_range/diff_parser.py ADDED Viewed

@@ -0,0 +1,218 @@
+"""Diff parser module for extracting file changes from unified diff format."""
+import sys
+from dataclasses import dataclass, field
+from typing import List, Set, Optional
+from unidiff import PatchSet
+@dataclass
+class FileChange:
+    """Represents changes to a single file."""
+    source_path: Optional[str]
+    target_path: Optional[str]
+    old_source: str = ""
+    new_source: str = ""
+    changed_old_lines: Set[int] = field(default_factory=set)
+    changed_new_lines: Set[int] = field(default_factory=set)
+    is_added: bool = False
+    is_removed: bool = False
+    is_renamed: bool = False
+def parse_diff(diff_text: str) -> List[FileChange]:
+    """
+    Parse unified diff text and extract file changes.
+    Args:
+        diff_text: Unified diff text from `git diff --full-index -U999999`
+    Returns:
+        List of FileChange objects for .java, .kt, and .py files
+    """
+    if not diff_text or not diff_text.strip():
+        return []
+    try:
+        patch_set = PatchSet(diff_text)
+    except Exception as e:
+        print(f"Warning: Failed to parse diff: {e}", file=sys.stderr)
+        return []
+    file_changes = []
+    for patched_file in patch_set:
+        file_change = _process_patched_file(patched_file)
+        if file_change:
+            file_changes.append(file_change)
+    return file_changes
+def _strip_prefix(path: str) -> str:
+    """Strip a/ or b/ prefix from git diff paths."""
+    if path.startswith('a/') or path.startswith('b/'):
+        return path[2:]
+    return path
+def _process_patched_file(patched_file) -> Optional[FileChange]:
+    """Process a single patched file and extract its changes."""
+    source_path = _strip_prefix(patched_file.source_file)
+    target_path = _strip_prefix(patched_file.target_file)
+    # Handle /dev/null paths for add/remove
+    if source_path == "/dev/null":
+        source_path = None
+    if target_path == "/dev/null":
+        target_path = None
+    # Determine the actual file path (use target for adds, source for deletes)
+    file_path = target_path or source_path
+    if not file_path:
+        return None
+    # Check if file is a .java or .kt file
+    if not _is_java_or_kotlin(file_path):
+        return None
+    # Check for binary files (no hunks)
+    if patched_file.is_binary_file:
+        print(f"Warning: Skipping binary file: {file_path}", file=sys.stderr)
+        return None
+    # Handle added file
+    if patched_file.is_added_file:
+        return _process_added_file(patched_file, source_path, target_path)
+    # Handle removed file
+    if patched_file.is_removed_file:
+        return _process_removed_file(patched_file, source_path, target_path)
+    # Handle renamed file
+    is_renamed = patched_file.is_rename
+    # Handle modified file (or renamed with changes)
+    return _process_modified_file(patched_file, source_path, target_path, is_renamed)
+def _is_java_or_kotlin(file_path: str) -> bool:
+    """Check if file path has .java, .kt, or .py extension."""
+    return file_path.endswith('.java') or file_path.endswith('.kt') or file_path.endswith('.py')
+def _process_added_file(patched_file, source_path: Optional[str], target_path: Optional[str]) -> FileChange:
+    """Process a newly added file."""
+    new_lines = []
+    changed_new_lines = set()
+    for hunk in patched_file:
+        for line in hunk:
+            if line.line_type == '+':
+                new_lines.append(line.value.rstrip('\n'))
+                changed_new_lines.add(line.target_line_no)
+    return FileChange(
+        source_path=source_path,
+        target_path=target_path,
+        old_source="",
+        new_source='\n'.join(new_lines),
+        changed_old_lines=set(),
+        changed_new_lines=changed_new_lines,
+        is_added=True,
+        is_removed=False,
+        is_renamed=False
+    )
+def _process_removed_file(patched_file, source_path: Optional[str], target_path: Optional[str]) -> FileChange:
+    """Process a deleted file."""
+    old_lines = []
+    changed_old_lines = set()
+    for hunk in patched_file:
+        for line in hunk:
+            if line.line_type == '-':
+                old_lines.append(line.value.rstrip('\n'))
+                changed_old_lines.add(line.source_line_no)
+    return FileChange(
+        source_path=source_path,
+        target_path=target_path,
+        old_source='\n'.join(old_lines),
+        new_source="",
+        changed_old_lines=changed_old_lines,
+        changed_new_lines=set(),
+        is_added=False,
+        is_removed=True,
+        is_renamed=False
+    )
+def _process_modified_file(patched_file, source_path: Optional[str], target_path: Optional[str], is_renamed: bool) -> FileChange:
+    """Process a modified file (including renamed files with changes)."""
+    # For full-context diffs (-U999999), we can reconstruct the full file
+    old_lines = []
+    new_lines = []
+    changed_old_lines = set()
+    changed_new_lines = set()
+    for hunk in patched_file:
+        for line in hunk:
+            if line.line_type == ' ':
+                # Context line - appears in both old and new
+                line_content = line.value.rstrip('\n')
+                # Add to both old and new source at appropriate positions
+                if line.source_line_no:
+                    # Extend old_lines to accommodate
+                    while len(old_lines) < line.source_line_no - 1:
+                        old_lines.append('')
+                    if line.source_line_no - 1 < len(old_lines):
+                        old_lines[line.source_line_no - 1] = line_content
+                    else:
+                        old_lines.append(line_content)
+                if line.target_line_no:
+                    while len(new_lines) < line.target_line_no - 1:
+                        new_lines.append('')
+                    if line.target_line_no - 1 < len(new_lines):
+                        new_lines[line.target_line_no - 1] = line_content
+                    else:
+                        new_lines.append(line_content)
+            elif line.line_type == '-':
+                # Deleted line - only in old
+                line_content = line.value.rstrip('\n')
+                if line.source_line_no:
+                    while len(old_lines) < line.source_line_no - 1:
+                        old_lines.append('')
+                    if line.source_line_no - 1 < len(old_lines):
+                        old_lines[line.source_line_no - 1] = line_content
+                    else:
+                        old_lines.append(line_content)
+                    changed_old_lines.add(line.source_line_no)
+            elif line.line_type == '+':
+                # Added line - only in new
+                line_content = line.value.rstrip('\n')
+                if line.target_line_no:
+                    while len(new_lines) < line.target_line_no - 1:
+                        new_lines.append('')
+                    if line.target_line_no - 1 < len(new_lines):
+                        new_lines[line.target_line_no - 1] = line_content
+                    else:
+                        new_lines.append(line_content)
+                    changed_new_lines.add(line.target_line_no)
+    return FileChange(
+        source_path=source_path,
+        target_path=target_path,
+        old_source='\n'.join(old_lines),
+        new_source='\n'.join(new_lines),
+        changed_old_lines=changed_old_lines,
+        changed_new_lines=changed_new_lines,
+        is_added=False,
+        is_removed=False,
+        is_renamed=is_renamed
+    )

diff_code_change_range/reference/__init__.py ADDED Viewed

@@ -0,0 +1,59 @@
+"""Reference extraction module for analyzing relationships between affected code nodes.
+This module provides functionality to extract reference relationships (method calls,
+field accesses, type references, etc.) between code nodes that are within the
+affected scope of a code change.
+Example usage:
+    from diff_code_change_range.reference import extract_references, AffectedScope
+    before_code = {"File.kt": "source code..."}
+    after_code = {"File.kt": "modified code..."}
+    scope = AffectedScope(before=[...], after=[...])
+    result = extract_references(before_code, after_code, scope)
+    print(result.added_references)    # New references in after version
+    print(result.removed_references)  # References removed in after version
+Supported reference types:
+    - METHOD_CALL: Method or function invocation
+    - FIELD_ACCESS: Field or property access
+    - TYPE_REFERENCE: Type usage (variable declarations, parameters, etc.)
+    - INSTANTIATION: Object creation via constructor
+    - ANNOTATION: Annotation usage
+    - INHERITANCE: Class inheritance (extends)
+    - IMPLEMENTATION: Interface implementation (implements)
+Supported languages:
+    - Kotlin (.kt files)
+    - Java (.java files)
+    - Python (.py files)
+Limitations:
+    - Uses Tree-sitter AST analysis with heuristic matching (not full semantic analysis)
+    - Only detects references where both source and target are in affectedScope
+    - System classes (java.lang.*, kotlin.*, etc.) are filtered out
+    - Method overloading is matched by name + argument count heuristic
+"""
+from .models import (
+    Reference,
+    ReferenceResult,
+    ReferenceType,
+    AffectedScope,
+    AffectedNode,
+    NodeType,
+    QualifiedNode,
+)
+from .extractor import extract_references
+__all__ = [
+    "Reference",
+    "ReferenceResult",
+    "ReferenceType",
+    "AffectedScope",
+    "AffectedNode",
+    "NodeType",
+    "QualifiedNode",
+    "extract_references",
+]