diff-code-change-range 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,555 @@
1
+ """Reference analyzer using Tree-sitter AST."""
2
+
3
+ import sys
4
+ from typing import List, Optional, Dict, Set
5
+ from tree_sitter import Language, Parser, Node
6
+
7
+ from .models import Reference, ReferenceType, QualifiedNode
8
+ from .code_slicer import CodeSlicer
9
+
10
+ try:
11
+ import tree_sitter_java as ts_java
12
+ JAVA_LANGUAGE = Language(ts_java.language())
13
+ JAVA_AVAILABLE = True
14
+ except ImportError:
15
+ JAVA_AVAILABLE = False
16
+ JAVA_LANGUAGE = None
17
+
18
+ try:
19
+ import tree_sitter_kotlin as ts_kotlin
20
+ KOTLIN_LANGUAGE = Language(ts_kotlin.language())
21
+ KOTLIN_AVAILABLE = True
22
+ except ImportError:
23
+ KOTLIN_AVAILABLE = False
24
+ KOTLIN_LANGUAGE = None
25
+
26
+ try:
27
+ import tree_sitter_python as ts_python
28
+ PYTHON_LANGUAGE = Language(ts_python.language())
29
+ PYTHON_AVAILABLE = True
30
+ except ImportError:
31
+ PYTHON_AVAILABLE = False
32
+ PYTHON_LANGUAGE = None
33
+
34
+
35
+ class ReferenceAnalyzer:
36
+ """Analyzes code to find references to other nodes."""
37
+
38
+ # System classes/packages to filter out
39
+ SYSTEM_PREFIXES = {
40
+ 'java.', 'javax.', 'kotlin.', 'kotlinx.',
41
+ 'android.', 'androidx.', 'com.android.',
42
+ }
43
+
44
+ def __init__(self, target_nodes: List[QualifiedNode], file_path: str):
45
+ """
46
+ Initialize analyzer with target nodes to look for.
47
+
48
+ Args:
49
+ target_nodes: List of nodes that can be referenced
50
+ file_path: Path of the file being analyzed
51
+ """
52
+ self.target_nodes = target_nodes
53
+ self.file_path = file_path
54
+ self._build_target_index()
55
+
56
+ def _build_target_index(self):
57
+ """Build index for fast target lookup."""
58
+ # Index by simple name
59
+ self.targets_by_name: Dict[str, List[QualifiedNode]] = {}
60
+ # Index by file
61
+ self.targets_by_file: Dict[str, List[QualifiedNode]] = {}
62
+
63
+ for node in self.target_nodes:
64
+ name = node.name
65
+ if name not in self.targets_by_name:
66
+ self.targets_by_name[name] = []
67
+ self.targets_by_name[name].append(node)
68
+
69
+ file_path = node.file_path
70
+ if file_path not in self.targets_by_file:
71
+ self.targets_by_file[file_path] = []
72
+ self.targets_by_file[file_path].append(node)
73
+
74
+ def analyze(
75
+ self,
76
+ source_code: str,
77
+ source_node: QualifiedNode
78
+ ) -> List[Reference]:
79
+ """
80
+ Analyze source code and find references to target nodes.
81
+
82
+ Args:
83
+ source_code: Full source code of the file
84
+ source_node: The node being analyzed (source of references)
85
+
86
+ Returns:
87
+ List of references found
88
+ """
89
+ references = []
90
+
91
+ # Get language parser
92
+ language = self._get_language()
93
+ if not language:
94
+ return references
95
+
96
+ # Use the full file source code, not just the snippet
97
+ # This gives better context for parsing
98
+ parser = Parser(language)
99
+ try:
100
+ tree = parser.parse(bytes(source_code, 'utf8'))
101
+ except Exception as e:
102
+ print(f"Warning: Failed to parse {self.file_path}: {e}", file=sys.stderr)
103
+ return references
104
+
105
+ # Find the specific node in the AST based on line range
106
+ target_node = self._find_node_at_line_range(tree.root_node, source_node.line_range)
107
+ if not target_node:
108
+ return references
109
+
110
+ # Find references based on file type
111
+ if self.file_path.endswith('.kt'):
112
+ references = self._analyze_kotlin_node(target_node, source_code, source_node)
113
+ elif self.file_path.endswith('.java'):
114
+ references = self._analyze_java_node(target_node, source_code, source_node)
115
+ elif self.file_path.endswith('.py'):
116
+ references = self._analyze_python_node(target_node, source_code, source_node)
117
+
118
+ return references
119
+
120
+ def _find_node_at_line_range(self, root: Node, line_range: tuple) -> Optional[Node]:
121
+ """Find AST node that matches the given line range."""
122
+ start_line, end_line = line_range
123
+
124
+ def walk(node: Node) -> Optional[Node]:
125
+ node_start = node.start_point[0] + 1 # 0-based to 1-based
126
+ node_end = node.end_point[0] + 1
127
+
128
+ # Check if this node spans the target range
129
+ if node_start <= start_line and node_end >= end_line:
130
+ # Check children first (more specific match)
131
+ for child in node.children:
132
+ result = walk(child)
133
+ if result:
134
+ return result
135
+ return node
136
+ return None
137
+
138
+ return walk(root)
139
+
140
+ def _get_language(self) -> Optional[Language]:
141
+ """Get the appropriate language for the file."""
142
+ if self.file_path.endswith('.java') and JAVA_AVAILABLE:
143
+ return JAVA_LANGUAGE
144
+ elif self.file_path.endswith('.kt') and KOTLIN_AVAILABLE:
145
+ return KOTLIN_LANGUAGE
146
+ elif self.file_path.endswith('.py') and PYTHON_AVAILABLE:
147
+ return PYTHON_LANGUAGE
148
+ return None
149
+
150
+ def _analyze_kotlin_node(
151
+ self,
152
+ node: Node,
153
+ source_code: str,
154
+ source_node: QualifiedNode
155
+ ) -> List[Reference]:
156
+ """Analyze a Kotlin AST node for references."""
157
+ references = []
158
+
159
+ def walk(node: Node):
160
+ node_type = node.type
161
+
162
+ # Method/Function call
163
+ if node_type == 'call_expression':
164
+ ref = self._extract_call_reference(node, source_code, source_node, 'kotlin')
165
+ if ref:
166
+ references.append(ref)
167
+
168
+ # Field/Property access - look for simple_identifier in expressions
169
+ elif node_type == 'simple_identifier':
170
+ # Check if this identifier is a field reference
171
+ parent = node.parent
172
+ if parent and parent.type in ('navigation_expression', 'assignment', 'property_delegate'):
173
+ ref = self._extract_identifier_reference(node, source_code, source_node)
174
+ if ref:
175
+ references.append(ref)
176
+
177
+ # Type reference
178
+ elif node_type in ('type_reference', 'user_type'):
179
+ ref = self._extract_type_reference(node, source_code, source_node)
180
+ if ref:
181
+ references.append(ref)
182
+
183
+ # Constructor call (object_creation)
184
+ elif node_type == 'object_literal':
185
+ ref = self._extract_instantiation(node, source_code, source_node)
186
+ if ref:
187
+ references.append(ref)
188
+
189
+ # Annotation
190
+ elif node_type == 'annotation':
191
+ ref = self._extract_annotation_reference(node, source_code, source_node)
192
+ if ref:
193
+ references.append(ref)
194
+
195
+ # Recurse into children
196
+ for child in node.children:
197
+ walk(child)
198
+
199
+ walk(node)
200
+ return references
201
+
202
+ def _analyze_java_node(
203
+ self,
204
+ node: Node,
205
+ source_code: str,
206
+ source_node: QualifiedNode
207
+ ) -> List[Reference]:
208
+ """Analyze a Java AST node for references."""
209
+ references = []
210
+
211
+ def walk(node: Node):
212
+ node_type = node.type
213
+
214
+ # Method call
215
+ if node_type == 'method_invocation':
216
+ ref = self._extract_call_reference(node, source_code, source_node, 'java')
217
+ if ref:
218
+ references.append(ref)
219
+
220
+ # Field access
221
+ elif node_type == 'field_access':
222
+ ref = self._extract_field_access(node, source_code, source_node)
223
+ if ref:
224
+ references.append(ref)
225
+
226
+ # Simple name that might be a field
227
+ elif node_type == 'identifier':
228
+ parent = node.parent
229
+ if parent and parent.type not in ('method_invocation', 'field_access', 'class_declaration'):
230
+ ref = self._extract_identifier_reference(node, source_code, source_node)
231
+ if ref:
232
+ references.append(ref)
233
+
234
+ # Type reference
235
+ elif node_type in ('type_identifier', 'generic_type'):
236
+ ref = self._extract_type_reference(node, source_code, source_node)
237
+ if ref:
238
+ references.append(ref)
239
+
240
+ # Constructor call
241
+ elif node_type == 'object_creation_expression':
242
+ ref = self._extract_instantiation(node, source_code, source_node)
243
+ if ref:
244
+ references.append(ref)
245
+
246
+ # Annotation
247
+ elif node_type == 'annotation':
248
+ ref = self._extract_annotation_reference(node, source_code, source_node)
249
+ if ref:
250
+ references.append(ref)
251
+
252
+ for child in node.children:
253
+ walk(child)
254
+
255
+ walk(node)
256
+ return references
257
+
258
+ def _analyze_python_node(
259
+ self,
260
+ node: Node,
261
+ source_code: str,
262
+ source_node: QualifiedNode
263
+ ) -> List[Reference]:
264
+ """Analyze a Python AST node for references."""
265
+ references = []
266
+
267
+ def walk(node: Node):
268
+ node_type = node.type
269
+
270
+ # Function/Method call
271
+ if node_type == 'call':
272
+ ref = self._extract_call_reference(node, source_code, source_node, 'python')
273
+ if ref:
274
+ references.append(ref)
275
+
276
+ # Attribute access
277
+ elif node_type == 'attribute':
278
+ ref = self._extract_attribute_access(node, source_code, source_node)
279
+ if ref:
280
+ references.append(ref)
281
+
282
+ # Simple identifier (might be a variable/field reference)
283
+ elif node_type == 'identifier':
284
+ parent = node.parent
285
+ if parent and parent.type not in ('call', 'attribute', 'function_definition', 'class_definition'):
286
+ ref = self._extract_identifier_reference(node, source_code, source_node)
287
+ if ref:
288
+ references.append(ref)
289
+
290
+ for child in node.children:
291
+ walk(child)
292
+
293
+ walk(node)
294
+ return references
295
+
296
+ def _extract_call_reference(
297
+ self,
298
+ node: Node,
299
+ source_code: str,
300
+ source_node: QualifiedNode,
301
+ language: str
302
+ ) -> Optional[Reference]:
303
+ """Extract method/function call reference."""
304
+ func_name = None
305
+ arg_count = 0
306
+
307
+ for child in node.children:
308
+ if child.type in ('identifier', 'simple_identifier'):
309
+ func_name = source_code[child.start_byte:child.end_byte]
310
+ break
311
+ elif child.type == 'value_arguments' or child.type == 'argument_list':
312
+ arg_count = self._count_arguments(child)
313
+
314
+ if not func_name:
315
+ return None
316
+
317
+ target = self._find_matching_target(func_name, arg_count)
318
+ if not target:
319
+ return None
320
+
321
+ abs_line = node.start_point[0] + 1
322
+
323
+ return Reference(
324
+ source=source_node.qualified_path,
325
+ target=target.qualified_path,
326
+ type=ReferenceType.METHOD_CALL,
327
+ line=abs_line
328
+ )
329
+
330
+ def _extract_identifier_reference(
331
+ self,
332
+ node: Node,
333
+ source_code: str,
334
+ source_node: QualifiedNode
335
+ ) -> Optional[Reference]:
336
+ """Extract reference from a simple identifier."""
337
+ name = source_code[node.start_byte:node.end_byte]
338
+
339
+ if not name or self._is_system_type(name):
340
+ return None
341
+
342
+ target = self._find_matching_target(name)
343
+ if not target:
344
+ return None
345
+
346
+ abs_line = node.start_point[0] + 1
347
+
348
+ return Reference(
349
+ source=source_node.qualified_path,
350
+ target=target.qualified_path,
351
+ type=ReferenceType.FIELD_ACCESS,
352
+ line=abs_line
353
+ )
354
+
355
+ def _extract_field_access(
356
+ self,
357
+ node: Node,
358
+ source_code: str,
359
+ source_node: QualifiedNode
360
+ ) -> Optional[Reference]:
361
+ """Extract field access (Java style)."""
362
+ field_name = None
363
+ for child in node.children:
364
+ if child.type == 'identifier':
365
+ field_name = source_code[child.start_byte:child.end_byte]
366
+ break
367
+
368
+ if not field_name:
369
+ return None
370
+
371
+ target = self._find_matching_target(field_name)
372
+ if not target:
373
+ return None
374
+
375
+ abs_line = node.start_point[0] + 1
376
+ return Reference(
377
+ source=source_node.qualified_path,
378
+ target=target.qualified_path,
379
+ type=ReferenceType.FIELD_ACCESS,
380
+ line=abs_line
381
+ )
382
+
383
+ def _extract_attribute_access(
384
+ self,
385
+ node: Node,
386
+ source_code: str,
387
+ source_node: QualifiedNode
388
+ ) -> Optional[Reference]:
389
+ """Extract attribute access (Python style)."""
390
+ attr_name = None
391
+ for child in node.children:
392
+ if child.type == 'identifier':
393
+ attr_name = source_code[child.start_byte:child.end_byte]
394
+ break
395
+
396
+ if not attr_name:
397
+ return None
398
+
399
+ target = self._find_matching_target(attr_name)
400
+ if not target:
401
+ return None
402
+
403
+ abs_line = node.start_point[0] + 1
404
+ return Reference(
405
+ source=source_node.qualified_path,
406
+ target=target.qualified_path,
407
+ type=ReferenceType.FIELD_ACCESS,
408
+ line=abs_line
409
+ )
410
+
411
+ def _extract_type_reference(
412
+ self,
413
+ node: Node,
414
+ source_code: str,
415
+ source_node: QualifiedNode
416
+ ) -> Optional[Reference]:
417
+ """Extract type reference."""
418
+ type_name = None
419
+
420
+ for child in node.children:
421
+ if child.type in ('identifier', 'simple_identifier', 'type_identifier'):
422
+ type_name = source_code[child.start_byte:child.end_byte]
423
+ break
424
+
425
+ if not type_name or self._is_system_type(type_name):
426
+ return None
427
+
428
+ target = self._find_matching_target(type_name)
429
+ if not target:
430
+ return None
431
+
432
+ abs_line = node.start_point[0] + 1
433
+ return Reference(
434
+ source=source_node.qualified_path,
435
+ target=target.qualified_path,
436
+ type=ReferenceType.TYPE_REFERENCE,
437
+ line=abs_line
438
+ )
439
+
440
+ def _extract_instantiation(
441
+ self,
442
+ node: Node,
443
+ source_code: str,
444
+ source_node: QualifiedNode
445
+ ) -> Optional[Reference]:
446
+ """Extract object instantiation reference."""
447
+ class_name = None
448
+
449
+ for child in node.children:
450
+ if child.type in ('identifier', 'simple_identifier', 'type_identifier', 'user_type'):
451
+ class_name = source_code[child.start_byte:child.end_byte]
452
+ break
453
+
454
+ if not class_name or self._is_system_type(class_name):
455
+ return None
456
+
457
+ target = self._find_matching_target(class_name)
458
+ if not target:
459
+ return None
460
+
461
+ abs_line = node.start_point[0] + 1
462
+ return Reference(
463
+ source=source_node.qualified_path,
464
+ target=target.qualified_path,
465
+ type=ReferenceType.INSTANTIATION,
466
+ line=abs_line
467
+ )
468
+
469
+ def _extract_annotation_reference(
470
+ self,
471
+ node: Node,
472
+ source_code: str,
473
+ source_node: QualifiedNode
474
+ ) -> Optional[Reference]:
475
+ """Extract annotation reference."""
476
+ annotation_name = None
477
+
478
+ for child in node.children:
479
+ if child.type in ('identifier', 'simple_identifier', 'user_type'):
480
+ annotation_name = source_code[child.start_byte:child.end_byte]
481
+ break
482
+
483
+ if not annotation_name:
484
+ return None
485
+
486
+ target = self._find_matching_target(annotation_name)
487
+ if not target:
488
+ return None
489
+
490
+ abs_line = node.start_point[0] + 1
491
+ return Reference(
492
+ source=source_node.qualified_path,
493
+ target=target.qualified_path,
494
+ type=ReferenceType.ANNOTATION,
495
+ line=abs_line
496
+ )
497
+
498
+ def _find_matching_target(self, name: str, arg_count: int = -1) -> Optional[QualifiedNode]:
499
+ """Find target node matching the given name."""
500
+ if name not in self.targets_by_name:
501
+ return None
502
+
503
+ candidates = self.targets_by_name[name]
504
+
505
+ if not candidates:
506
+ return None
507
+
508
+ # If only one candidate, return it
509
+ if len(candidates) == 1:
510
+ return candidates[0]
511
+
512
+ # Multiple candidates: apply heuristics
513
+ # 1. Prefer same file
514
+ same_file = [c for c in candidates if c.file_path == self.file_path]
515
+ if same_file:
516
+ candidates = same_file
517
+ if len(candidates) == 1:
518
+ return candidates[0]
519
+
520
+ # 2. Match by argument count (for methods)
521
+ if arg_count >= 0:
522
+ # This is a simplified check
523
+ pass
524
+
525
+ return candidates[0]
526
+
527
+ def _count_arguments(self, node: Node) -> int:
528
+ """Count arguments in an argument list."""
529
+ count = 0
530
+ for child in node.children:
531
+ if child.type in ('value_argument', 'argument', 'positional_argument'):
532
+ count += 1
533
+ return count
534
+
535
+ def _is_system_type(self, type_name: str) -> bool:
536
+ """Check if type is a system/builtin type."""
537
+ builtin_types = {
538
+ 'String', 'Int', 'Long', 'Boolean', 'Double', 'Float', 'Char', 'Byte', 'Short',
539
+ 'Integer', 'Boolean', 'Character', 'Void', 'Object', 'Class',
540
+ 'List', 'Map', 'Set', 'Collection', 'Iterable', 'Iterator',
541
+ 'ArrayList', 'HashMap', 'HashSet',
542
+ 'int', 'str', 'bool', 'float', 'list', 'dict', 'set', 'tuple',
543
+ 'Any', 'Unit', 'Nothing', 'Throwable', 'Exception',
544
+ 'true', 'false', 'null', 'println', 'print', 'repeat',
545
+ }
546
+
547
+ if type_name in builtin_types:
548
+ return True
549
+
550
+ # Check prefixes
551
+ for prefix in self.SYSTEM_PREFIXES:
552
+ if type_name.startswith(prefix):
553
+ return True
554
+
555
+ return False
@@ -0,0 +1,58 @@
1
+ """Code slicer for extracting code snippets by line range."""
2
+
3
+ from typing import Optional, Tuple
4
+
5
+
6
+ class CodeSlicer:
7
+ """Extracts code snippets from source code by line range."""
8
+
9
+ @staticmethod
10
+ def extract(source_code: str, line_range: Tuple[int, int]) -> str:
11
+ """
12
+ Extract code lines from source by line range (1-based, inclusive).
13
+
14
+ Args:
15
+ source_code: The full source code
16
+ line_range: Tuple of (start_line, end_line) - 1-based, inclusive
17
+
18
+ Returns:
19
+ Extracted code snippet
20
+ """
21
+ if not source_code:
22
+ return ""
23
+
24
+ lines = source_code.split('\n')
25
+ start_line, end_line = line_range
26
+
27
+ # Handle edge cases
28
+ if start_line < 1:
29
+ start_line = 1
30
+ if end_line > len(lines):
31
+ end_line = len(lines)
32
+ if start_line > end_line:
33
+ return ""
34
+
35
+ # Extract lines (convert to 0-based indexing)
36
+ extracted = lines[start_line - 1:end_line]
37
+ return '\n'.join(extracted)
38
+
39
+ @staticmethod
40
+ def get_line_at(source_code: str, line_number: int) -> Optional[str]:
41
+ """
42
+ Get a specific line from source code.
43
+
44
+ Args:
45
+ source_code: The full source code
46
+ line_number: 1-based line number
47
+
48
+ Returns:
49
+ The line content or None if out of bounds
50
+ """
51
+ if not source_code or line_number < 1:
52
+ return None
53
+
54
+ lines = source_code.split('\n')
55
+ if line_number > len(lines):
56
+ return None
57
+
58
+ return lines[line_number - 1]
@@ -0,0 +1,80 @@
1
+ """Reference differ for computing added and removed references."""
2
+
3
+ from typing import List, Set
4
+ from .models import Reference, ReferenceResult
5
+
6
+
7
+ class ReferenceDiffer:
8
+ """Computes differences between before and after reference sets."""
9
+
10
+ @staticmethod
11
+ def compute_diff(
12
+ before_refs: List[Reference],
13
+ after_refs: List[Reference]
14
+ ) -> ReferenceResult:
15
+ """
16
+ Compute added and removed references.
17
+
18
+ Two references are considered equal if they have the same source,
19
+ target, and type. Line numbers are ignored.
20
+
21
+ Args:
22
+ before_refs: References from before version
23
+ after_refs: References from after version
24
+
25
+ Returns:
26
+ ReferenceResult with before, after, added, and removed references
27
+ """
28
+ # Convert to sets for comparison (Reference.__eq__ ignores line number)
29
+ before_set = set(before_refs)
30
+ after_set = set(after_refs)
31
+
32
+ # Compute differences
33
+ added = after_set - before_set
34
+ removed = before_set - after_set
35
+
36
+ # Convert back to lists, preserving original line numbers from after/before
37
+ added_list = ReferenceDiffer._restore_line_numbers(
38
+ list(added), after_refs
39
+ )
40
+ removed_list = ReferenceDiffer._restore_line_numbers(
41
+ list(removed), before_refs
42
+ )
43
+
44
+ return ReferenceResult(
45
+ before_references=before_refs,
46
+ after_references=after_refs,
47
+ added_references=added_list,
48
+ removed_references=removed_list
49
+ )
50
+
51
+ @staticmethod
52
+ def _restore_line_numbers(
53
+ diff_refs: List[Reference],
54
+ original_refs: List[Reference]
55
+ ) -> List[Reference]:
56
+ """
57
+ Restore line numbers from original references.
58
+
59
+ When we convert to sets, we lose the original line numbers.
60
+ This restores them from the original list.
61
+ """
62
+ # Build lookup from original refs
63
+ original_map = {}
64
+ for ref in original_refs:
65
+ key = (ref.source, ref.target, ref.type)
66
+ original_map[key] = ref.line
67
+
68
+ # Restore line numbers
69
+ result = []
70
+ for ref in diff_refs:
71
+ key = (ref.source, ref.target, ref.type)
72
+ line = original_map.get(key, ref.line)
73
+ result.append(Reference(
74
+ source=ref.source,
75
+ target=ref.target,
76
+ type=ref.type,
77
+ line=line
78
+ ))
79
+
80
+ return result