diff-code-change-range 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diff_code_change_range/__init__.py +17 -0
- diff_code_change_range/__main__.py +7 -0
- diff_code_change_range/affected_marker.py +173 -0
- diff_code_change_range/cli.py +167 -0
- diff_code_change_range/diff_parser.py +218 -0
- diff_code_change_range/reference/__init__.py +59 -0
- diff_code_change_range/reference/analyzer.py +555 -0
- diff_code_change_range/reference/code_slicer.py +58 -0
- diff_code_change_range/reference/differ.py +80 -0
- diff_code_change_range/reference/extractor.py +130 -0
- diff_code_change_range/reference/models.py +85 -0
- diff_code_change_range/reference/scope_parser.py +79 -0
- diff_code_change_range/structure_extractor.py +750 -0
- diff_code_change_range/yaml_reporter.py +89 -0
- diff_code_change_range-0.0.1.dist-info/METADATA +386 -0
- diff_code_change_range-0.0.1.dist-info/RECORD +19 -0
- diff_code_change_range-0.0.1.dist-info/WHEEL +5 -0
- diff_code_change_range-0.0.1.dist-info/entry_points.txt +2 -0
- diff_code_change_range-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,750 @@
|
|
|
1
|
+
"""Code structure extractor using tree-sitter for Java and Kotlin."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import List, Optional, Dict, Any, Callable
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
import tree_sitter
|
|
9
|
+
from tree_sitter import Language, Parser
|
|
10
|
+
|
|
11
|
+
# Try to import language libraries, handle if not available
|
|
12
|
+
try:
|
|
13
|
+
import tree_sitter_java as ts_java
|
|
14
|
+
JAVA_LANGUAGE = Language(ts_java.language())
|
|
15
|
+
JAVA_AVAILABLE = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
JAVA_AVAILABLE = False
|
|
18
|
+
JAVA_LANGUAGE = None
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
import tree_sitter_kotlin as ts_kotlin
|
|
22
|
+
KOTLIN_LANGUAGE = Language(ts_kotlin.language())
|
|
23
|
+
KOTLIN_AVAILABLE = True
|
|
24
|
+
except ImportError:
|
|
25
|
+
KOTLIN_AVAILABLE = False
|
|
26
|
+
KOTLIN_LANGUAGE = None
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
import tree_sitter_python as ts_python
|
|
30
|
+
PYTHON_LANGUAGE = Language(ts_python.language())
|
|
31
|
+
PYTHON_AVAILABLE = True
|
|
32
|
+
except ImportError:
|
|
33
|
+
PYTHON_AVAILABLE = False
|
|
34
|
+
PYTHON_LANGUAGE = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class NodeType(Enum):
|
|
38
|
+
"""Types of code nodes."""
|
|
39
|
+
FILE = "file"
|
|
40
|
+
CLASS = "class"
|
|
41
|
+
INTERFACE = "interface"
|
|
42
|
+
OBJECT = "object"
|
|
43
|
+
ENUM = "enum"
|
|
44
|
+
FUNCTION = "function"
|
|
45
|
+
METHOD = "method"
|
|
46
|
+
MEMBER = "member"
|
|
47
|
+
VARIABLE = "variable" # Module-level variables (Python)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class CodeNode:
|
|
52
|
+
"""Represents a node in the code structure tree."""
|
|
53
|
+
name: str
|
|
54
|
+
node_type: NodeType
|
|
55
|
+
line_range: tuple # (start_line, end_line) - 1-based, inclusive
|
|
56
|
+
children: List['CodeNode'] = field(default_factory=list)
|
|
57
|
+
is_affected: bool = False
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def extract_structure(source_code: str, file_path: str) -> Optional[CodeNode]:
|
|
61
|
+
"""
|
|
62
|
+
Extract code structure from source code using tree-sitter.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
source_code: The source code to parse
|
|
66
|
+
file_path: Path to the file (used to determine language)
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
CodeNode representing the file structure, or None if parsing fails
|
|
70
|
+
"""
|
|
71
|
+
if not source_code:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
language = _get_language_for_file(file_path)
|
|
75
|
+
if not language:
|
|
76
|
+
print(f"Warning: Unsupported file type: {file_path}", file=sys.stderr)
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
parser = Parser(language)
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
tree = parser.parse(bytes(source_code, 'utf8'))
|
|
83
|
+
except Exception as e:
|
|
84
|
+
print(f"Warning: Failed to parse {file_path}: {e}", file=sys.stderr)
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
root_node = tree.root_node
|
|
88
|
+
|
|
89
|
+
# Extract file-level nodes based on language
|
|
90
|
+
if file_path.endswith('.java'):
|
|
91
|
+
children = _extract_java_file_nodes(root_node, source_code)
|
|
92
|
+
elif file_path.endswith('.kt'):
|
|
93
|
+
children = _extract_kotlin_file_nodes(root_node, source_code)
|
|
94
|
+
elif file_path.endswith('.py'):
|
|
95
|
+
children = _extract_python_file_nodes(root_node, source_code)
|
|
96
|
+
else:
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
# Calculate file line range
|
|
100
|
+
if root_node.child_count > 0:
|
|
101
|
+
start_line = root_node.start_point[0] + 1 # Convert to 1-based
|
|
102
|
+
end_line = root_node.end_point[0] + 1
|
|
103
|
+
else:
|
|
104
|
+
start_line = 1
|
|
105
|
+
end_line = source_code.count('\n') + 1
|
|
106
|
+
|
|
107
|
+
return CodeNode(
|
|
108
|
+
name=file_path,
|
|
109
|
+
node_type=NodeType.FILE,
|
|
110
|
+
line_range=(start_line, end_line),
|
|
111
|
+
children=children
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _get_language_for_file(file_path: str) -> Optional[Language]:
|
|
116
|
+
"""Get the appropriate Language for a file based on extension."""
|
|
117
|
+
if file_path.endswith('.java') and JAVA_AVAILABLE:
|
|
118
|
+
return JAVA_LANGUAGE
|
|
119
|
+
elif file_path.endswith('.kt') and KOTLIN_AVAILABLE:
|
|
120
|
+
return KOTLIN_LANGUAGE
|
|
121
|
+
elif file_path.endswith('.py') and PYTHON_AVAILABLE:
|
|
122
|
+
return PYTHON_LANGUAGE
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _extract_java_file_nodes(root_node: tree_sitter.Node, source_code: str) -> List[CodeNode]:
|
|
127
|
+
"""Extract top-level nodes from Java source."""
|
|
128
|
+
nodes = []
|
|
129
|
+
|
|
130
|
+
# Cursor to traverse the tree
|
|
131
|
+
cursor = root_node.walk()
|
|
132
|
+
|
|
133
|
+
# Visit all children of root
|
|
134
|
+
if cursor.goto_first_child():
|
|
135
|
+
while True:
|
|
136
|
+
node = cursor.node
|
|
137
|
+
node_type = node.type
|
|
138
|
+
|
|
139
|
+
if node_type == 'class_declaration':
|
|
140
|
+
class_node = _extract_java_class(node, source_code)
|
|
141
|
+
if class_node:
|
|
142
|
+
nodes.append(class_node)
|
|
143
|
+
elif node_type == 'interface_declaration':
|
|
144
|
+
interface_node = _extract_java_interface(node, source_code)
|
|
145
|
+
if interface_node:
|
|
146
|
+
nodes.append(interface_node)
|
|
147
|
+
elif node_type == 'enum_declaration':
|
|
148
|
+
enum_node = _extract_java_enum(node, source_code)
|
|
149
|
+
if enum_node:
|
|
150
|
+
nodes.append(enum_node)
|
|
151
|
+
elif node_type == 'field_declaration':
|
|
152
|
+
# Top-level fields (in class context, but handle here for completeness)
|
|
153
|
+
pass # Fields are handled within class extraction
|
|
154
|
+
|
|
155
|
+
if not cursor.goto_next_sibling():
|
|
156
|
+
break
|
|
157
|
+
|
|
158
|
+
return nodes
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _extract_java_class(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
162
|
+
"""Extract a Java class node."""
|
|
163
|
+
name = _extract_node_name(node, source_code)
|
|
164
|
+
children = _extract_java_class_children(node, source_code)
|
|
165
|
+
|
|
166
|
+
return CodeNode(
|
|
167
|
+
name=name,
|
|
168
|
+
node_type=NodeType.CLASS,
|
|
169
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1),
|
|
170
|
+
children=children
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _extract_java_interface(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
175
|
+
"""Extract a Java interface node."""
|
|
176
|
+
name = _extract_node_name(node, source_code)
|
|
177
|
+
children = _extract_java_class_children(node, source_code) # Similar structure
|
|
178
|
+
|
|
179
|
+
return CodeNode(
|
|
180
|
+
name=name,
|
|
181
|
+
node_type=NodeType.INTERFACE,
|
|
182
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1),
|
|
183
|
+
children=children
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _extract_java_enum(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
188
|
+
"""Extract a Java enum node."""
|
|
189
|
+
name = _extract_node_name(node, source_code)
|
|
190
|
+
children = _extract_java_class_children(node, source_code)
|
|
191
|
+
|
|
192
|
+
return CodeNode(
|
|
193
|
+
name=name,
|
|
194
|
+
node_type=NodeType.ENUM,
|
|
195
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1),
|
|
196
|
+
children=children
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _extract_java_class_children(class_node: tree_sitter.Node, source_code: str) -> List[CodeNode]:
|
|
201
|
+
"""Extract children (methods, fields) from a Java class."""
|
|
202
|
+
children = []
|
|
203
|
+
|
|
204
|
+
# Find the class body
|
|
205
|
+
body_node = None
|
|
206
|
+
for child in class_node.children:
|
|
207
|
+
if child.type == 'class_body':
|
|
208
|
+
body_node = child
|
|
209
|
+
break
|
|
210
|
+
|
|
211
|
+
if not body_node:
|
|
212
|
+
return children
|
|
213
|
+
|
|
214
|
+
for child in body_node.children:
|
|
215
|
+
node_type = child.type
|
|
216
|
+
|
|
217
|
+
if node_type == 'method_declaration':
|
|
218
|
+
method = _extract_java_method(child, source_code)
|
|
219
|
+
if method:
|
|
220
|
+
children.append(method)
|
|
221
|
+
elif node_type == 'field_declaration':
|
|
222
|
+
fields = _extract_java_fields(child, source_code)
|
|
223
|
+
children.extend(fields)
|
|
224
|
+
elif node_type == 'constructor_declaration':
|
|
225
|
+
method = _extract_java_constructor(child, source_code)
|
|
226
|
+
if method:
|
|
227
|
+
children.append(method)
|
|
228
|
+
elif node_type in ('class_declaration', 'interface_declaration', 'enum_declaration'):
|
|
229
|
+
# Inner classes
|
|
230
|
+
if node_type == 'class_declaration':
|
|
231
|
+
inner = _extract_java_class(child, source_code)
|
|
232
|
+
elif node_type == 'interface_declaration':
|
|
233
|
+
inner = _extract_java_interface(child, source_code)
|
|
234
|
+
else:
|
|
235
|
+
inner = _extract_java_enum(child, source_code)
|
|
236
|
+
if inner:
|
|
237
|
+
children.append(inner)
|
|
238
|
+
|
|
239
|
+
return children
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _extract_java_method(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
243
|
+
"""Extract a Java method node."""
|
|
244
|
+
name = _extract_node_name(node, source_code)
|
|
245
|
+
return CodeNode(
|
|
246
|
+
name=name,
|
|
247
|
+
node_type=NodeType.METHOD,
|
|
248
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1)
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _extract_java_constructor(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
253
|
+
"""Extract a Java constructor as a method node."""
|
|
254
|
+
name = _extract_node_name(node, source_code)
|
|
255
|
+
return CodeNode(
|
|
256
|
+
name=name,
|
|
257
|
+
node_type=NodeType.METHOD,
|
|
258
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1)
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _extract_java_fields(node: tree_sitter.Node, source_code: str) -> List[CodeNode]:
|
|
263
|
+
"""Extract field declarations from a Java field_declaration node."""
|
|
264
|
+
fields = []
|
|
265
|
+
|
|
266
|
+
# field_declaration contains type and declarator(s)
|
|
267
|
+
for child in node.children:
|
|
268
|
+
if child.type == 'variable_declarator':
|
|
269
|
+
field_name = _extract_node_name(child, source_code)
|
|
270
|
+
fields.append(CodeNode(
|
|
271
|
+
name=field_name,
|
|
272
|
+
node_type=NodeType.MEMBER,
|
|
273
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1)
|
|
274
|
+
))
|
|
275
|
+
|
|
276
|
+
return fields
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _extract_kotlin_file_nodes(root_node: tree_sitter.Node, source_code: str) -> List[CodeNode]:
|
|
280
|
+
"""Extract top-level nodes from Kotlin source."""
|
|
281
|
+
nodes = []
|
|
282
|
+
|
|
283
|
+
cursor = root_node.walk()
|
|
284
|
+
|
|
285
|
+
if cursor.goto_first_child():
|
|
286
|
+
while True:
|
|
287
|
+
node = cursor.node
|
|
288
|
+
node_type = node.type
|
|
289
|
+
|
|
290
|
+
if node_type == 'class_declaration':
|
|
291
|
+
class_node = _extract_kotlin_class(node, source_code)
|
|
292
|
+
if class_node:
|
|
293
|
+
nodes.append(class_node)
|
|
294
|
+
elif node_type == 'object_declaration':
|
|
295
|
+
obj_node = _extract_kotlin_object(node, source_code)
|
|
296
|
+
if obj_node:
|
|
297
|
+
nodes.append(obj_node)
|
|
298
|
+
elif node_type == 'function_declaration':
|
|
299
|
+
func_node = _extract_kotlin_function(node, source_code)
|
|
300
|
+
if func_node:
|
|
301
|
+
nodes.append(func_node)
|
|
302
|
+
elif node_type == 'property_declaration':
|
|
303
|
+
prop_nodes = _extract_kotlin_properties(node, source_code)
|
|
304
|
+
nodes.extend(prop_nodes)
|
|
305
|
+
|
|
306
|
+
if not cursor.goto_next_sibling():
|
|
307
|
+
break
|
|
308
|
+
|
|
309
|
+
return nodes
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _extract_kotlin_class(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
313
|
+
"""Extract a Kotlin class node."""
|
|
314
|
+
name = _extract_node_name(node, source_code)
|
|
315
|
+
children = _extract_kotlin_class_children(node, source_code)
|
|
316
|
+
|
|
317
|
+
# Check if it's an enum class
|
|
318
|
+
node_type = NodeType.CLASS
|
|
319
|
+
for child in node.children:
|
|
320
|
+
if child.type == 'enum_class_body':
|
|
321
|
+
node_type = NodeType.ENUM
|
|
322
|
+
break
|
|
323
|
+
|
|
324
|
+
return CodeNode(
|
|
325
|
+
name=name,
|
|
326
|
+
node_type=node_type,
|
|
327
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1),
|
|
328
|
+
children=children
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _extract_kotlin_object(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
333
|
+
"""Extract a Kotlin object node."""
|
|
334
|
+
name = _extract_node_name(node, source_code)
|
|
335
|
+
children = _extract_kotlin_class_children(node, source_code)
|
|
336
|
+
|
|
337
|
+
return CodeNode(
|
|
338
|
+
name=name,
|
|
339
|
+
node_type=NodeType.OBJECT,
|
|
340
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1),
|
|
341
|
+
children=children
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _extract_kotlin_function(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
346
|
+
"""Extract a Kotlin function node (top-level or class-level)."""
|
|
347
|
+
name = _extract_node_name(node, source_code)
|
|
348
|
+
return CodeNode(
|
|
349
|
+
name=name,
|
|
350
|
+
node_type=NodeType.FUNCTION,
|
|
351
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1)
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _extract_kotlin_properties(node: tree_sitter.Node, source_code: str) -> List[CodeNode]:
|
|
356
|
+
"""Extract property declarations from Kotlin."""
|
|
357
|
+
properties = []
|
|
358
|
+
|
|
359
|
+
# Find variable declarations within property_declaration
|
|
360
|
+
for child in node.children:
|
|
361
|
+
if child.type == 'variable_declaration':
|
|
362
|
+
prop_name = _extract_node_name(child, source_code)
|
|
363
|
+
properties.append(CodeNode(
|
|
364
|
+
name=prop_name,
|
|
365
|
+
node_type=NodeType.MEMBER,
|
|
366
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1)
|
|
367
|
+
))
|
|
368
|
+
|
|
369
|
+
# If no variable_declaration found, try to get name from property_declaration itself
|
|
370
|
+
if not properties:
|
|
371
|
+
name = _extract_node_name(node, source_code)
|
|
372
|
+
if name:
|
|
373
|
+
properties.append(CodeNode(
|
|
374
|
+
name=name,
|
|
375
|
+
node_type=NodeType.MEMBER,
|
|
376
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1)
|
|
377
|
+
))
|
|
378
|
+
|
|
379
|
+
return properties
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _extract_kotlin_class_children(class_node: tree_sitter.Node, source_code: str) -> List[CodeNode]:
|
|
383
|
+
"""Extract children from a Kotlin class or object."""
|
|
384
|
+
children = []
|
|
385
|
+
|
|
386
|
+
# Find the class body
|
|
387
|
+
body_node = None
|
|
388
|
+
for child in class_node.children:
|
|
389
|
+
if child.type in ('class_body', 'enum_class_body', 'object_body'):
|
|
390
|
+
body_node = child
|
|
391
|
+
break
|
|
392
|
+
|
|
393
|
+
if not body_node:
|
|
394
|
+
return children
|
|
395
|
+
|
|
396
|
+
for child in body_node.children:
|
|
397
|
+
node_type = child.type
|
|
398
|
+
|
|
399
|
+
if node_type == 'function_declaration':
|
|
400
|
+
# Determine if it's a method (inside class) or function (top-level)
|
|
401
|
+
func = _extract_kotlin_function(child, source_code)
|
|
402
|
+
if func:
|
|
403
|
+
func.node_type = NodeType.METHOD # Inside class, it's a method
|
|
404
|
+
children.append(func)
|
|
405
|
+
elif node_type == 'property_declaration':
|
|
406
|
+
props = _extract_kotlin_properties(child, source_code)
|
|
407
|
+
children.extend(props)
|
|
408
|
+
elif node_type in ('class_declaration', 'object_declaration'):
|
|
409
|
+
if node_type == 'class_declaration':
|
|
410
|
+
inner = _extract_kotlin_class(child, source_code)
|
|
411
|
+
else:
|
|
412
|
+
inner = _extract_kotlin_object(child, source_code)
|
|
413
|
+
if inner:
|
|
414
|
+
children.append(inner)
|
|
415
|
+
|
|
416
|
+
return children
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def _extract_node_name(node: tree_sitter.Node, source_code: str) -> str:
|
|
420
|
+
"""Extract the name/identifier from a node."""
|
|
421
|
+
# Look for identifier child
|
|
422
|
+
for child in node.children:
|
|
423
|
+
if child.type == 'identifier':
|
|
424
|
+
return source_code[child.start_byte:child.end_byte]
|
|
425
|
+
|
|
426
|
+
# For some nodes, the name might be in a different field
|
|
427
|
+
# Try to find any identifier recursively in immediate children
|
|
428
|
+
for child in node.children[:3]: # Look in first few children
|
|
429
|
+
if child.type == 'identifier':
|
|
430
|
+
return source_code[child.start_byte:child.end_byte]
|
|
431
|
+
|
|
432
|
+
return "<anonymous>"
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
# ============================================================================
|
|
436
|
+
# Python Extraction Functions
|
|
437
|
+
# ============================================================================
|
|
438
|
+
|
|
439
|
+
def _extract_python_file_nodes(root_node: tree_sitter.Node, source_code: str) -> List[CodeNode]:
|
|
440
|
+
"""Extract top-level nodes from Python source."""
|
|
441
|
+
nodes = []
|
|
442
|
+
|
|
443
|
+
cursor = root_node.walk()
|
|
444
|
+
|
|
445
|
+
if cursor.goto_first_child():
|
|
446
|
+
while True:
|
|
447
|
+
node = cursor.node
|
|
448
|
+
node_type = node.type
|
|
449
|
+
|
|
450
|
+
if node_type == 'class_definition':
|
|
451
|
+
class_node = _extract_python_class(node, source_code)
|
|
452
|
+
if class_node:
|
|
453
|
+
nodes.append(class_node)
|
|
454
|
+
elif node_type == 'function_definition':
|
|
455
|
+
func_node = _extract_python_function(node, source_code)
|
|
456
|
+
if func_node:
|
|
457
|
+
nodes.append(func_node)
|
|
458
|
+
elif node_type == 'expression_statement':
|
|
459
|
+
# Check for assignment (module-level variable)
|
|
460
|
+
assign_node = _extract_python_module_variable(node, source_code)
|
|
461
|
+
if assign_node:
|
|
462
|
+
nodes.append(assign_node)
|
|
463
|
+
elif node_type == 'assignment':
|
|
464
|
+
# Direct assignment at module level
|
|
465
|
+
var_node = _extract_python_variable_from_assignment(node, source_code)
|
|
466
|
+
if var_node:
|
|
467
|
+
nodes.append(var_node)
|
|
468
|
+
|
|
469
|
+
if not cursor.goto_next_sibling():
|
|
470
|
+
break
|
|
471
|
+
|
|
472
|
+
return nodes
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def _extract_python_class(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
476
|
+
"""Extract a Python class node."""
|
|
477
|
+
name = _extract_node_name(node, source_code)
|
|
478
|
+
children = _extract_python_class_children(node, source_code)
|
|
479
|
+
|
|
480
|
+
return CodeNode(
|
|
481
|
+
name=name,
|
|
482
|
+
node_type=NodeType.CLASS,
|
|
483
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1),
|
|
484
|
+
children=children
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def _extract_python_function(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
489
|
+
"""Extract a Python function node (module-level)."""
|
|
490
|
+
name = _build_python_function_name(node, source_code)
|
|
491
|
+
return CodeNode(
|
|
492
|
+
name=name,
|
|
493
|
+
node_type=NodeType.FUNCTION,
|
|
494
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1)
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def _build_python_function_name(node: tree_sitter.Node, source_code: str) -> str:
|
|
499
|
+
"""Extract function name with async prefix if applicable."""
|
|
500
|
+
# Check if it's an async function by looking at first child
|
|
501
|
+
is_async = False
|
|
502
|
+
for child in node.children:
|
|
503
|
+
if child.type == 'async':
|
|
504
|
+
is_async = True
|
|
505
|
+
break
|
|
506
|
+
elif child.type == 'def':
|
|
507
|
+
break
|
|
508
|
+
|
|
509
|
+
name = _extract_node_name(node, source_code)
|
|
510
|
+
if is_async:
|
|
511
|
+
return f"async {name}"
|
|
512
|
+
return name
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def _extract_python_module_variable(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
516
|
+
"""Extract module-level variable from expression_statement containing assignment."""
|
|
517
|
+
# expression_statement may contain assignment
|
|
518
|
+
for child in node.children:
|
|
519
|
+
if child.type == 'assignment':
|
|
520
|
+
return _extract_python_variable_from_assignment(child, source_code)
|
|
521
|
+
return None
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _extract_python_variable_from_assignment(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
525
|
+
"""Extract a variable from an assignment node."""
|
|
526
|
+
# Get the left-hand side (pattern/identifier)
|
|
527
|
+
for child in node.children:
|
|
528
|
+
if child.type in ('identifier', 'pattern'):
|
|
529
|
+
name = source_code[child.start_byte:child.end_byte]
|
|
530
|
+
return CodeNode(
|
|
531
|
+
name=name,
|
|
532
|
+
node_type=NodeType.VARIABLE,
|
|
533
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1)
|
|
534
|
+
)
|
|
535
|
+
elif child.type == 'pattern_list' or child.type == 'tuple_pattern':
|
|
536
|
+
# Handle tuple unpacking: a, b = 1, 2
|
|
537
|
+
# Return the first variable as primary
|
|
538
|
+
for subchild in child.children:
|
|
539
|
+
if subchild.type == 'identifier':
|
|
540
|
+
name = source_code[subchild.start_byte:subchild.end_byte]
|
|
541
|
+
return CodeNode(
|
|
542
|
+
name=name,
|
|
543
|
+
node_type=NodeType.VARIABLE,
|
|
544
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1)
|
|
545
|
+
)
|
|
546
|
+
return None
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def _extract_python_class_children(class_node: tree_sitter.Node, source_code: str) -> List[CodeNode]:
|
|
550
|
+
"""Extract children (methods, class variables, nested classes) from a Python class."""
|
|
551
|
+
children = []
|
|
552
|
+
|
|
553
|
+
# Find the class body (block)
|
|
554
|
+
body_node = None
|
|
555
|
+
for child in class_node.children:
|
|
556
|
+
if child.type == 'block':
|
|
557
|
+
body_node = child
|
|
558
|
+
break
|
|
559
|
+
|
|
560
|
+
if not body_node:
|
|
561
|
+
return children
|
|
562
|
+
|
|
563
|
+
for child in body_node.children:
|
|
564
|
+
node_type = child.type
|
|
565
|
+
|
|
566
|
+
if node_type == 'function_definition':
|
|
567
|
+
method = _extract_python_method(child, source_code)
|
|
568
|
+
if method:
|
|
569
|
+
children.append(method)
|
|
570
|
+
elif node_type == 'decorated_definition':
|
|
571
|
+
# Handle decorated functions/methods
|
|
572
|
+
method = _extract_python_decorated_method(child, source_code)
|
|
573
|
+
if method:
|
|
574
|
+
children.append(method)
|
|
575
|
+
elif node_type == 'class_definition':
|
|
576
|
+
# Handle nested classes
|
|
577
|
+
inner_class = _extract_python_class(child, source_code)
|
|
578
|
+
if inner_class:
|
|
579
|
+
children.append(inner_class)
|
|
580
|
+
elif node_type == 'expression_statement':
|
|
581
|
+
# Check for class variable assignment
|
|
582
|
+
member_node = _extract_python_class_variable(child, source_code)
|
|
583
|
+
if member_node:
|
|
584
|
+
children.append(member_node)
|
|
585
|
+
elif node_type == 'assignment':
|
|
586
|
+
# Direct assignment in class body
|
|
587
|
+
member = _extract_python_variable_from_assignment(child, source_code)
|
|
588
|
+
if member:
|
|
589
|
+
member.node_type = NodeType.MEMBER
|
|
590
|
+
children.append(member)
|
|
591
|
+
|
|
592
|
+
return children
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def _extract_python_decorated_method(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
596
|
+
"""Extract a decorated Python method node."""
|
|
597
|
+
# Find the function_definition inside decorated_definition
|
|
598
|
+
for child in node.children:
|
|
599
|
+
if child.type == 'function_definition':
|
|
600
|
+
return _extract_python_method(node, source_code)
|
|
601
|
+
return None
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def _extract_python_method(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
605
|
+
"""Extract a Python method node."""
|
|
606
|
+
name = _build_decorated_name(node, source_code)
|
|
607
|
+
|
|
608
|
+
# Find the actual function definition node for instance variable extraction
|
|
609
|
+
func_node = node
|
|
610
|
+
if node.type == 'decorated_definition':
|
|
611
|
+
for child in node.children:
|
|
612
|
+
if child.type == 'function_definition':
|
|
613
|
+
func_node = child
|
|
614
|
+
break
|
|
615
|
+
|
|
616
|
+
# Also extract instance variables from __init__ and other methods
|
|
617
|
+
instance_vars = _extract_instance_variables(func_node, source_code)
|
|
618
|
+
|
|
619
|
+
method_node = CodeNode(
|
|
620
|
+
name=name,
|
|
621
|
+
node_type=NodeType.METHOD,
|
|
622
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1),
|
|
623
|
+
children=instance_vars
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
return method_node
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
def _build_decorated_name(node: tree_sitter.Node, source_code: str) -> str:
|
|
630
|
+
"""Build method name with decorator prefixes."""
|
|
631
|
+
# Collect decorators and check for async
|
|
632
|
+
decorators = []
|
|
633
|
+
is_async = False
|
|
634
|
+
func_node = node
|
|
635
|
+
|
|
636
|
+
# If this is a decorated_definition, look inside for the function
|
|
637
|
+
if node.type == 'decorated_definition':
|
|
638
|
+
for child in node.children:
|
|
639
|
+
if child.type == 'function_definition':
|
|
640
|
+
func_node = child
|
|
641
|
+
break
|
|
642
|
+
# Extract decorators from decorated_definition
|
|
643
|
+
for child in node.children:
|
|
644
|
+
if child.type == 'decorator':
|
|
645
|
+
dec_name = _extract_decorator_name(child, source_code)
|
|
646
|
+
if dec_name:
|
|
647
|
+
decorators.append(dec_name)
|
|
648
|
+
|
|
649
|
+
# Check for async in function_definition
|
|
650
|
+
for child in func_node.children:
|
|
651
|
+
if child.type == 'async':
|
|
652
|
+
is_async = True
|
|
653
|
+
break
|
|
654
|
+
|
|
655
|
+
# Get base name from function node
|
|
656
|
+
base_name = _extract_node_name(func_node, source_code)
|
|
657
|
+
|
|
658
|
+
# Build full name
|
|
659
|
+
name_parts = []
|
|
660
|
+
if decorators:
|
|
661
|
+
name_parts.append('@' + ',@'.join(decorators))
|
|
662
|
+
if is_async:
|
|
663
|
+
name_parts.append('async')
|
|
664
|
+
name_parts.append(base_name)
|
|
665
|
+
|
|
666
|
+
return ' '.join(name_parts)
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _extract_decorator_name(decorator_node: tree_sitter.Node, source_code: str) -> str:
|
|
670
|
+
"""Extract decorator name from a decorator node."""
|
|
671
|
+
# decorator contains identifier or call
|
|
672
|
+
for child in decorator_node.children:
|
|
673
|
+
if child.type == 'identifier':
|
|
674
|
+
return source_code[child.start_byte:child.end_byte]
|
|
675
|
+
elif child.type == 'call':
|
|
676
|
+
# Decorator with arguments: @decorator(...)
|
|
677
|
+
for subchild in child.children:
|
|
678
|
+
if subchild.type == 'identifier':
|
|
679
|
+
return source_code[subchild.start_byte:subchild.end_byte]
|
|
680
|
+
elif child.type == 'attribute':
|
|
681
|
+
# Decorator like @abc.classmethod
|
|
682
|
+
return source_code[child.start_byte:child.end_byte]
|
|
683
|
+
return ""
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def _extract_python_class_variable(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
687
|
+
"""Extract class-level variable from expression_statement."""
|
|
688
|
+
for child in node.children:
|
|
689
|
+
if child.type == 'assignment':
|
|
690
|
+
member = _extract_python_variable_from_assignment(child, source_code)
|
|
691
|
+
if member:
|
|
692
|
+
member.node_type = NodeType.MEMBER
|
|
693
|
+
return member
|
|
694
|
+
return None
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def _extract_instance_variables(node: tree_sitter.Node, source_code: str) -> List[CodeNode]:
|
|
698
|
+
"""Extract instance variables (self.x) from method body."""
|
|
699
|
+
instance_vars = []
|
|
700
|
+
|
|
701
|
+
# Find the block (method body)
|
|
702
|
+
body_node = None
|
|
703
|
+
for child in node.children:
|
|
704
|
+
if child.type == 'block':
|
|
705
|
+
body_node = child
|
|
706
|
+
break
|
|
707
|
+
|
|
708
|
+
if not body_node:
|
|
709
|
+
return instance_vars
|
|
710
|
+
|
|
711
|
+
# Look for expression_statement containing assignment
|
|
712
|
+
for child in body_node.children:
|
|
713
|
+
if child.type == 'expression_statement':
|
|
714
|
+
for subchild in child.children:
|
|
715
|
+
if subchild.type == 'assignment':
|
|
716
|
+
var = _extract_self_assignment(subchild, source_code)
|
|
717
|
+
if var:
|
|
718
|
+
instance_vars.append(var)
|
|
719
|
+
elif child.type == 'assignment':
|
|
720
|
+
var = _extract_self_assignment(child, source_code)
|
|
721
|
+
if var:
|
|
722
|
+
instance_vars.append(var)
|
|
723
|
+
|
|
724
|
+
return instance_vars
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
def _extract_self_assignment(node: tree_sitter.Node, source_code: str) -> Optional[CodeNode]:
|
|
728
|
+
"""Extract instance variable from self.x = ... assignment."""
|
|
729
|
+
# Check if left side is attribute (self.x)
|
|
730
|
+
for child in node.children:
|
|
731
|
+
if child.type == 'attribute':
|
|
732
|
+
# Check if it's self.something
|
|
733
|
+
is_self = False
|
|
734
|
+
attr_name = ""
|
|
735
|
+
|
|
736
|
+
for subchild in child.children:
|
|
737
|
+
if subchild.type == 'identifier' and source_code[subchild.start_byte:subchild.end_byte] == 'self':
|
|
738
|
+
is_self = True
|
|
739
|
+
elif subchild.type == 'identifier' and is_self:
|
|
740
|
+
attr_name = source_code[subchild.start_byte:subchild.end_byte]
|
|
741
|
+
elif subchild.type == 'type': # The . operator
|
|
742
|
+
pass
|
|
743
|
+
|
|
744
|
+
if is_self and attr_name:
|
|
745
|
+
return CodeNode(
|
|
746
|
+
name=attr_name,
|
|
747
|
+
node_type=NodeType.MEMBER,
|
|
748
|
+
line_range=(node.start_point[0] + 1, node.end_point[0] + 1)
|
|
749
|
+
)
|
|
750
|
+
return None
|