diff-code-change-range 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ """diff-code-change-range: Extract affected code structures from git diff output."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from .diff_parser import parse_diff, FileChange
6
+ from .structure_extractor import extract_structure, CodeNode
7
+ from .affected_marker import mark_affected_nodes
8
+ from .yaml_reporter import generate_yaml_report
9
+
10
+ __all__ = [
11
+ "parse_diff",
12
+ "FileChange",
13
+ "extract_structure",
14
+ "CodeNode",
15
+ "mark_affected_nodes",
16
+ "generate_yaml_report",
17
+ ]
@@ -0,0 +1,7 @@
1
+ """Entry point for `python -m diff_code_change_range`."""
2
+
3
+ import sys
4
+ from .cli import main
5
+
6
+ if __name__ == '__main__':
7
+ sys.exit(main())
@@ -0,0 +1,173 @@
1
+ """Affected node marker module for identifying changed code structures."""
2
+
3
+ from typing import Set, Optional, List
4
+ from .diff_parser import FileChange
5
+ from .structure_extractor import CodeNode, NodeType
6
+
7
+
8
+ def mark_affected_nodes(
9
+ structure: Optional[CodeNode],
10
+ file_change: FileChange,
11
+ is_before: bool
12
+ ) -> Optional[CodeNode]:
13
+ """
14
+ Mark affected nodes in a code structure based on changed lines.
15
+
16
+ Args:
17
+ structure: The code structure tree (can be None for deleted/added files)
18
+ file_change: The file change information
19
+ is_before: If True, use changed_old_lines; else use changed_new_lines
20
+
21
+ Returns:
22
+ Filtered structure containing only affected nodes, or None if no nodes affected
23
+ """
24
+ if structure is None:
25
+ return None
26
+
27
+ # Determine which changed lines to use
28
+ if is_before:
29
+ changed_lines = file_change.changed_old_lines
30
+ else:
31
+ changed_lines = file_change.changed_new_lines
32
+
33
+ # Handle pure add/delete files - mark all nodes as affected
34
+ if file_change.is_added or file_change.is_removed:
35
+ return _mark_all_affected(structure)
36
+
37
+ # For modified files, check line intersection
38
+ if not changed_lines:
39
+ return None
40
+
41
+ return _filter_affected_nodes(structure, changed_lines)
42
+
43
+
44
+ def _line_range_intersects(node_range: tuple, changed_lines: Set[int]) -> bool:
45
+ """
46
+ Check if a node's line range intersects with changed lines.
47
+
48
+ Args:
49
+ node_range: Tuple of (start_line, end_line) - 1-based, inclusive
50
+ changed_lines: Set of changed line numbers
51
+
52
+ Returns:
53
+ True if any line in the range is in changed_lines
54
+ """
55
+ start, end = node_range
56
+ # Check if any line in [start, end] is in changed_lines
57
+ for line in range(start, end + 1):
58
+ if line in changed_lines:
59
+ return True
60
+ return False
61
+
62
+
63
+ def _mark_all_affected(node: CodeNode) -> CodeNode:
64
+ """
65
+ Mark all nodes in the tree as affected.
66
+
67
+ Args:
68
+ node: Root node of the structure tree
69
+
70
+ Returns:
71
+ A new tree with all nodes marked as affected
72
+ """
73
+ new_children = [_mark_all_affected(child) for child in node.children]
74
+ return CodeNode(
75
+ name=node.name,
76
+ node_type=node.node_type,
77
+ line_range=node.line_range,
78
+ children=new_children,
79
+ is_affected=True
80
+ )
81
+
82
+
83
+ def _filter_affected_nodes(node: CodeNode, changed_lines: Set[int]) -> Optional[CodeNode]:
84
+ """
85
+ Filter nodes to only include those affected by changed lines.
86
+
87
+ For leaf nodes (METHOD, FUNCTION, MEMBER), they are affected if their
88
+ line range intersects with changed lines.
89
+
90
+ For container nodes (CLASS, INTERFACE, OBJECT, ENUM, FILE), they are
91
+ affected if any of their children is affected.
92
+
93
+ Args:
94
+ node: Current node to process
95
+ changed_lines: Set of changed line numbers
96
+
97
+ Returns:
98
+ Filtered node if affected, None otherwise
99
+ """
100
+ # Determine if this is a leaf node type
101
+ is_leaf = node.node_type in (NodeType.METHOD, NodeType.FUNCTION, NodeType.MEMBER)
102
+
103
+ if is_leaf:
104
+ # Leaf node: affected if intersects with changed lines
105
+ if _line_range_intersects(node.line_range, changed_lines):
106
+ return CodeNode(
107
+ name=node.name,
108
+ node_type=node.node_type,
109
+ line_range=node.line_range,
110
+ children=[], # Leaf nodes have no children
111
+ is_affected=True
112
+ )
113
+ return None
114
+
115
+ # Container node: process children and keep affected ones
116
+ affected_children = []
117
+ for child in node.children:
118
+ affected_child = _filter_affected_nodes(child, changed_lines)
119
+ if affected_child:
120
+ affected_children.append(affected_child)
121
+
122
+ # Container is affected if any child is affected
123
+ if affected_children:
124
+ return CodeNode(
125
+ name=node.name,
126
+ node_type=node.node_type,
127
+ line_range=node.line_range,
128
+ children=affected_children,
129
+ is_affected=True
130
+ )
131
+
132
+ # No affected children - check if container itself intersects with changes
133
+ # This handles cases where a class header line (e.g., "public class Foo {"
134
+ # or just the class declaration) is modified
135
+ if _line_range_intersects(node.line_range, changed_lines):
136
+ return CodeNode(
137
+ name=node.name,
138
+ node_type=node.node_type,
139
+ line_range=node.line_range,
140
+ children=affected_children, # May be empty
141
+ is_affected=True
142
+ )
143
+
144
+ return None
145
+
146
+
147
+ def process_file_change(file_change: FileChange) -> tuple:
148
+ """
149
+ Process a file change and return affected structures for before and after.
150
+
151
+ Args:
152
+ file_change: The file change to process
153
+
154
+ Returns:
155
+ Tuple of (before_structure, after_structure) where each is the
156
+ filtered CodeNode tree or None
157
+ """
158
+ from .structure_extractor import extract_structure
159
+
160
+ before_structure = None
161
+ after_structure = None
162
+
163
+ # Process before version
164
+ if file_change.source_path and file_change.old_source:
165
+ before_full = extract_structure(file_change.old_source, file_change.source_path)
166
+ before_structure = mark_affected_nodes(before_full, file_change, is_before=True)
167
+
168
+ # Process after version
169
+ if file_change.target_path and file_change.new_source:
170
+ after_full = extract_structure(file_change.new_source, file_change.target_path)
171
+ after_structure = mark_affected_nodes(after_full, file_change, is_before=False)
172
+
173
+ return before_structure, after_structure
@@ -0,0 +1,167 @@
1
+ """CLI entry point for diff-code-change-range."""
2
+
3
+ import argparse
4
+ import sys
5
+ from typing import Optional
6
+
7
+ from .diff_parser import parse_diff, FileChange
8
+ from .structure_extractor import extract_structure, CodeNode
9
+ from .affected_marker import mark_affected_nodes
10
+ from .yaml_reporter import generate_and_write_report
11
+
12
+
13
+ def create_parser() -> argparse.ArgumentParser:
14
+ """Create and configure the argument parser."""
15
+ parser = argparse.ArgumentParser(
16
+ prog='diff-code-change-range',
17
+ description='Extract affected code structures from git diff output for Java and Kotlin files'
18
+ )
19
+
20
+ parser.add_argument(
21
+ 'diff_file',
22
+ nargs='?',
23
+ help='Path to diff file (default: read from stdin)'
24
+ )
25
+
26
+ parser.add_argument(
27
+ '-v', '--version',
28
+ action='version',
29
+ version='%(prog)s 0.1.0'
30
+ )
31
+
32
+ return parser
33
+
34
+
35
+ def read_diff_input(diff_file: Optional[str]) -> str:
36
+ """
37
+ Read diff input from file or stdin.
38
+
39
+ Args:
40
+ diff_file: Path to diff file, or None to read from stdin
41
+
42
+ Returns:
43
+ The diff text content
44
+
45
+ Raises:
46
+ FileNotFoundError: If the specified file doesn't exist
47
+ IOError: If there's an error reading the file
48
+ """
49
+ if diff_file:
50
+ try:
51
+ with open(diff_file, 'r', encoding='utf-8') as f:
52
+ return f.read()
53
+ except FileNotFoundError:
54
+ print(f"Error: File not found: {diff_file}", file=sys.stderr)
55
+ raise
56
+ except IOError as e:
57
+ print(f"Error reading file: {e}", file=sys.stderr)
58
+ raise
59
+ else:
60
+ return sys.stdin.read()
61
+
62
+
63
+ def process_diff(diff_text: str) -> tuple:
64
+ """
65
+ Process diff text and return before/after structures.
66
+
67
+ Args:
68
+ diff_text: The unified diff text
69
+
70
+ Returns:
71
+ Tuple of (before_structures, after_structures) lists
72
+ """
73
+ file_changes = parse_diff(diff_text)
74
+
75
+ before_structures = []
76
+ after_structures = []
77
+
78
+ for file_change in file_changes:
79
+ before, after = _process_single_file(file_change)
80
+ if before:
81
+ before_structures.append(before)
82
+ if after:
83
+ after_structures.append(after)
84
+
85
+ return before_structures, after_structures
86
+
87
+
88
+ def _process_single_file(file_change: FileChange) -> tuple:
89
+ """
90
+ Process a single file change.
91
+
92
+ Args:
93
+ file_change: The file change to process
94
+
95
+ Returns:
96
+ Tuple of (before_structure, after_structure)
97
+ """
98
+ before_structure = None
99
+ after_structure = None
100
+
101
+ # Process before version
102
+ if file_change.source_path and file_change.old_source:
103
+ try:
104
+ before_full = extract_structure(file_change.old_source, file_change.source_path)
105
+ if before_full:
106
+ before_structure = mark_affected_nodes(before_full, file_change, is_before=True)
107
+ except Exception as e:
108
+ print(f"Warning: Failed to process before version of {file_change.source_path}: {e}",
109
+ file=sys.stderr)
110
+
111
+ # Process after version
112
+ if file_change.target_path and file_change.new_source:
113
+ try:
114
+ after_full = extract_structure(file_change.new_source, file_change.target_path)
115
+ if after_full:
116
+ after_structure = mark_affected_nodes(after_full, file_change, is_before=False)
117
+ except Exception as e:
118
+ print(f"Warning: Failed to process after version of {file_change.target_path}: {e}",
119
+ file=sys.stderr)
120
+
121
+ return before_structure, after_structure
122
+
123
+
124
+ def main(args: Optional[list] = None) -> int:
125
+ """
126
+ Main entry point for the CLI.
127
+
128
+ Args:
129
+ args: Command line arguments (default: sys.argv[1:])
130
+
131
+ Returns:
132
+ Exit code (0 for success, 1 for error)
133
+ """
134
+ parser = create_parser()
135
+ parsed_args = parser.parse_args(args)
136
+
137
+ try:
138
+ # Read diff input
139
+ diff_text = read_diff_input(parsed_args.diff_file)
140
+
141
+ if not diff_text.strip():
142
+ # Empty input - output empty result
143
+ generate_and_write_report([], [])
144
+ return 0
145
+
146
+ # Process diff
147
+ before_structures, after_structures = process_diff(diff_text)
148
+
149
+ # Generate and output report
150
+ generate_and_write_report(before_structures, after_structures)
151
+
152
+ return 0
153
+
154
+ except FileNotFoundError:
155
+ return 1
156
+ except IOError:
157
+ return 1
158
+ except KeyboardInterrupt:
159
+ print("\nInterrupted", file=sys.stderr)
160
+ return 130
161
+ except Exception as e:
162
+ print(f"Error: {e}", file=sys.stderr)
163
+ return 1
164
+
165
+
166
+ if __name__ == '__main__':
167
+ sys.exit(main())
@@ -0,0 +1,218 @@
1
+ """Diff parser module for extracting file changes from unified diff format."""
2
+
3
+ import sys
4
+ from dataclasses import dataclass, field
5
+ from typing import List, Set, Optional
6
+ from unidiff import PatchSet
7
+
8
+
9
+ @dataclass
10
+ class FileChange:
11
+ """Represents changes to a single file."""
12
+ source_path: Optional[str]
13
+ target_path: Optional[str]
14
+ old_source: str = ""
15
+ new_source: str = ""
16
+ changed_old_lines: Set[int] = field(default_factory=set)
17
+ changed_new_lines: Set[int] = field(default_factory=set)
18
+ is_added: bool = False
19
+ is_removed: bool = False
20
+ is_renamed: bool = False
21
+
22
+
23
+ def parse_diff(diff_text: str) -> List[FileChange]:
24
+ """
25
+ Parse unified diff text and extract file changes.
26
+
27
+ Args:
28
+ diff_text: Unified diff text from `git diff --full-index -U999999`
29
+
30
+ Returns:
31
+ List of FileChange objects for .java, .kt, and .py files
32
+ """
33
+ if not diff_text or not diff_text.strip():
34
+ return []
35
+
36
+ try:
37
+ patch_set = PatchSet(diff_text)
38
+ except Exception as e:
39
+ print(f"Warning: Failed to parse diff: {e}", file=sys.stderr)
40
+ return []
41
+
42
+ file_changes = []
43
+
44
+ for patched_file in patch_set:
45
+ file_change = _process_patched_file(patched_file)
46
+ if file_change:
47
+ file_changes.append(file_change)
48
+
49
+ return file_changes
50
+
51
+
52
+ def _strip_prefix(path: str) -> str:
53
+ """Strip a/ or b/ prefix from git diff paths."""
54
+ if path.startswith('a/') or path.startswith('b/'):
55
+ return path[2:]
56
+ return path
57
+
58
+
59
+ def _process_patched_file(patched_file) -> Optional[FileChange]:
60
+ """Process a single patched file and extract its changes."""
61
+ source_path = _strip_prefix(patched_file.source_file)
62
+ target_path = _strip_prefix(patched_file.target_file)
63
+
64
+ # Handle /dev/null paths for add/remove
65
+ if source_path == "/dev/null":
66
+ source_path = None
67
+ if target_path == "/dev/null":
68
+ target_path = None
69
+
70
+ # Determine the actual file path (use target for adds, source for deletes)
71
+ file_path = target_path or source_path
72
+
73
+ if not file_path:
74
+ return None
75
+
76
+ # Check if file is a .java or .kt file
77
+ if not _is_java_or_kotlin(file_path):
78
+ return None
79
+
80
+ # Check for binary files (no hunks)
81
+ if patched_file.is_binary_file:
82
+ print(f"Warning: Skipping binary file: {file_path}", file=sys.stderr)
83
+ return None
84
+
85
+ # Handle added file
86
+ if patched_file.is_added_file:
87
+ return _process_added_file(patched_file, source_path, target_path)
88
+
89
+ # Handle removed file
90
+ if patched_file.is_removed_file:
91
+ return _process_removed_file(patched_file, source_path, target_path)
92
+
93
+ # Handle renamed file
94
+ is_renamed = patched_file.is_rename
95
+
96
+ # Handle modified file (or renamed with changes)
97
+ return _process_modified_file(patched_file, source_path, target_path, is_renamed)
98
+
99
+
100
+ def _is_java_or_kotlin(file_path: str) -> bool:
101
+ """Check if file path has .java, .kt, or .py extension."""
102
+ return file_path.endswith('.java') or file_path.endswith('.kt') or file_path.endswith('.py')
103
+
104
+
105
+ def _process_added_file(patched_file, source_path: Optional[str], target_path: Optional[str]) -> FileChange:
106
+ """Process a newly added file."""
107
+ new_lines = []
108
+ changed_new_lines = set()
109
+
110
+ for hunk in patched_file:
111
+ for line in hunk:
112
+ if line.line_type == '+':
113
+ new_lines.append(line.value.rstrip('\n'))
114
+ changed_new_lines.add(line.target_line_no)
115
+
116
+ return FileChange(
117
+ source_path=source_path,
118
+ target_path=target_path,
119
+ old_source="",
120
+ new_source='\n'.join(new_lines),
121
+ changed_old_lines=set(),
122
+ changed_new_lines=changed_new_lines,
123
+ is_added=True,
124
+ is_removed=False,
125
+ is_renamed=False
126
+ )
127
+
128
+
129
+ def _process_removed_file(patched_file, source_path: Optional[str], target_path: Optional[str]) -> FileChange:
130
+ """Process a deleted file."""
131
+ old_lines = []
132
+ changed_old_lines = set()
133
+
134
+ for hunk in patched_file:
135
+ for line in hunk:
136
+ if line.line_type == '-':
137
+ old_lines.append(line.value.rstrip('\n'))
138
+ changed_old_lines.add(line.source_line_no)
139
+
140
+ return FileChange(
141
+ source_path=source_path,
142
+ target_path=target_path,
143
+ old_source='\n'.join(old_lines),
144
+ new_source="",
145
+ changed_old_lines=changed_old_lines,
146
+ changed_new_lines=set(),
147
+ is_added=False,
148
+ is_removed=True,
149
+ is_renamed=False
150
+ )
151
+
152
+
153
+ def _process_modified_file(patched_file, source_path: Optional[str], target_path: Optional[str], is_renamed: bool) -> FileChange:
154
+ """Process a modified file (including renamed files with changes)."""
155
+ # For full-context diffs (-U999999), we can reconstruct the full file
156
+ old_lines = []
157
+ new_lines = []
158
+ changed_old_lines = set()
159
+ changed_new_lines = set()
160
+
161
+ for hunk in patched_file:
162
+ for line in hunk:
163
+ if line.line_type == ' ':
164
+ # Context line - appears in both old and new
165
+ line_content = line.value.rstrip('\n')
166
+ # Add to both old and new source at appropriate positions
167
+ if line.source_line_no:
168
+ # Extend old_lines to accommodate
169
+ while len(old_lines) < line.source_line_no - 1:
170
+ old_lines.append('')
171
+ if line.source_line_no - 1 < len(old_lines):
172
+ old_lines[line.source_line_no - 1] = line_content
173
+ else:
174
+ old_lines.append(line_content)
175
+
176
+ if line.target_line_no:
177
+ while len(new_lines) < line.target_line_no - 1:
178
+ new_lines.append('')
179
+ if line.target_line_no - 1 < len(new_lines):
180
+ new_lines[line.target_line_no - 1] = line_content
181
+ else:
182
+ new_lines.append(line_content)
183
+
184
+ elif line.line_type == '-':
185
+ # Deleted line - only in old
186
+ line_content = line.value.rstrip('\n')
187
+ if line.source_line_no:
188
+ while len(old_lines) < line.source_line_no - 1:
189
+ old_lines.append('')
190
+ if line.source_line_no - 1 < len(old_lines):
191
+ old_lines[line.source_line_no - 1] = line_content
192
+ else:
193
+ old_lines.append(line_content)
194
+ changed_old_lines.add(line.source_line_no)
195
+
196
+ elif line.line_type == '+':
197
+ # Added line - only in new
198
+ line_content = line.value.rstrip('\n')
199
+ if line.target_line_no:
200
+ while len(new_lines) < line.target_line_no - 1:
201
+ new_lines.append('')
202
+ if line.target_line_no - 1 < len(new_lines):
203
+ new_lines[line.target_line_no - 1] = line_content
204
+ else:
205
+ new_lines.append(line_content)
206
+ changed_new_lines.add(line.target_line_no)
207
+
208
+ return FileChange(
209
+ source_path=source_path,
210
+ target_path=target_path,
211
+ old_source='\n'.join(old_lines),
212
+ new_source='\n'.join(new_lines),
213
+ changed_old_lines=changed_old_lines,
214
+ changed_new_lines=changed_new_lines,
215
+ is_added=False,
216
+ is_removed=False,
217
+ is_renamed=is_renamed
218
+ )
@@ -0,0 +1,59 @@
1
+ """Reference extraction module for analyzing relationships between affected code nodes.
2
+
3
+ This module provides functionality to extract reference relationships (method calls,
4
+ field accesses, type references, etc.) between code nodes that are within the
5
+ affected scope of a code change.
6
+
7
+ Example usage:
8
+ from diff_code_change_range.reference import extract_references, AffectedScope
9
+
10
+ before_code = {"File.kt": "source code..."}
11
+ after_code = {"File.kt": "modified code..."}
12
+ scope = AffectedScope(before=[...], after=[...])
13
+
14
+ result = extract_references(before_code, after_code, scope)
15
+ print(result.added_references) # New references in after version
16
+ print(result.removed_references) # References removed in after version
17
+
18
+ Supported reference types:
19
+ - METHOD_CALL: Method or function invocation
20
+ - FIELD_ACCESS: Field or property access
21
+ - TYPE_REFERENCE: Type usage (variable declarations, parameters, etc.)
22
+ - INSTANTIATION: Object creation via constructor
23
+ - ANNOTATION: Annotation usage
24
+ - INHERITANCE: Class inheritance (extends)
25
+ - IMPLEMENTATION: Interface implementation (implements)
26
+
27
+ Supported languages:
28
+ - Kotlin (.kt files)
29
+ - Java (.java files)
30
+ - Python (.py files)
31
+
32
+ Limitations:
33
+ - Uses Tree-sitter AST analysis with heuristic matching (not full semantic analysis)
34
+ - Only detects references where both source and target are in affectedScope
35
+ - System classes (java.lang.*, kotlin.*, etc.) are filtered out
36
+ - Method overloading is matched by name + argument count heuristic
37
+ """
38
+
39
+ from .models import (
40
+ Reference,
41
+ ReferenceResult,
42
+ ReferenceType,
43
+ AffectedScope,
44
+ AffectedNode,
45
+ NodeType,
46
+ QualifiedNode,
47
+ )
48
+ from .extractor import extract_references
49
+
50
+ __all__ = [
51
+ "Reference",
52
+ "ReferenceResult",
53
+ "ReferenceType",
54
+ "AffectedScope",
55
+ "AffectedNode",
56
+ "NodeType",
57
+ "QualifiedNode",
58
+ "extract_references",
59
+ ]