roma-debug 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- roma_debug/__init__.py +3 -0
- roma_debug/config.py +79 -0
- roma_debug/core/__init__.py +5 -0
- roma_debug/core/engine.py +423 -0
- roma_debug/core/models.py +313 -0
- roma_debug/main.py +753 -0
- roma_debug/parsers/__init__.py +21 -0
- roma_debug/parsers/base.py +189 -0
- roma_debug/parsers/python_ast_parser.py +268 -0
- roma_debug/parsers/registry.py +196 -0
- roma_debug/parsers/traceback_patterns.py +314 -0
- roma_debug/parsers/treesitter_parser.py +598 -0
- roma_debug/prompts.py +153 -0
- roma_debug/server.py +247 -0
- roma_debug/tracing/__init__.py +28 -0
- roma_debug/tracing/call_chain.py +278 -0
- roma_debug/tracing/context_builder.py +672 -0
- roma_debug/tracing/dependency_graph.py +298 -0
- roma_debug/tracing/error_analyzer.py +399 -0
- roma_debug/tracing/import_resolver.py +315 -0
- roma_debug/tracing/project_scanner.py +569 -0
- roma_debug/utils/__init__.py +5 -0
- roma_debug/utils/context.py +422 -0
- roma_debug-0.1.0.dist-info/METADATA +34 -0
- roma_debug-0.1.0.dist-info/RECORD +36 -0
- roma_debug-0.1.0.dist-info/WHEEL +5 -0
- roma_debug-0.1.0.dist-info/entry_points.txt +2 -0
- roma_debug-0.1.0.dist-info/licenses/LICENSE +201 -0
- roma_debug-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +1 -0
- tests/test_context.py +208 -0
- tests/test_engine.py +296 -0
- tests/test_parsers.py +534 -0
- tests/test_project_scanner.py +275 -0
- tests/test_traceback_patterns.py +222 -0
- tests/test_tracing.py +296 -0
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
"""Tree-sitter based parser for multi-language support.
|
|
2
|
+
|
|
3
|
+
Provides semantic parsing for JavaScript, TypeScript, Go, Rust, Java,
|
|
4
|
+
and other languages using tree-sitter grammars.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from typing import Optional, List, Dict, Any
|
|
9
|
+
|
|
10
|
+
from roma_debug.core.models import Language, Symbol, Import
|
|
11
|
+
from roma_debug.parsers.base import BaseParser
|
|
12
|
+
|
|
13
|
+
# Try to import tree-sitter
|
|
14
|
+
try:
|
|
15
|
+
import tree_sitter
|
|
16
|
+
TREE_SITTER_AVAILABLE = True
|
|
17
|
+
except ImportError:
|
|
18
|
+
TREE_SITTER_AVAILABLE = False
|
|
19
|
+
tree_sitter = None
|
|
20
|
+
|
|
21
|
+
# Language-specific tree-sitter modules
|
|
22
|
+
_LANGUAGE_MODULES: Dict[Language, str] = {
|
|
23
|
+
Language.PYTHON: "tree_sitter_python",
|
|
24
|
+
Language.JAVASCRIPT: "tree_sitter_javascript",
|
|
25
|
+
Language.TYPESCRIPT: "tree_sitter_typescript",
|
|
26
|
+
Language.GO: "tree_sitter_go",
|
|
27
|
+
Language.RUST: "tree_sitter_rust",
|
|
28
|
+
Language.JAVA: "tree_sitter_java",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _get_tree_sitter_language(lang: Language) -> Optional[Any]:
|
|
33
|
+
"""Get the tree-sitter language object for a language.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
lang: The Language enum value
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
tree-sitter Language object or None if not available
|
|
40
|
+
"""
|
|
41
|
+
if not TREE_SITTER_AVAILABLE:
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
module_name = _LANGUAGE_MODULES.get(lang)
|
|
45
|
+
if not module_name:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
module = __import__(module_name)
|
|
50
|
+
# tree-sitter-python exposes language() function
|
|
51
|
+
if hasattr(module, 'language'):
|
|
52
|
+
return tree_sitter.Language(module.language())
|
|
53
|
+
return None
|
|
54
|
+
except ImportError:
|
|
55
|
+
return None
|
|
56
|
+
except Exception:
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# Node types that represent functions/methods in each language
|
|
61
|
+
FUNCTION_TYPES: Dict[Language, List[str]] = {
|
|
62
|
+
Language.PYTHON: ["function_definition", "async_function_definition"],
|
|
63
|
+
Language.JAVASCRIPT: ["function_declaration", "function_expression", "arrow_function", "method_definition"],
|
|
64
|
+
Language.TYPESCRIPT: ["function_declaration", "function_expression", "arrow_function", "method_definition", "method_signature"],
|
|
65
|
+
Language.GO: ["function_declaration", "method_declaration"],
|
|
66
|
+
Language.RUST: ["function_item", "impl_item"],
|
|
67
|
+
Language.JAVA: ["method_declaration", "constructor_declaration"],
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# Node types that represent classes/structs in each language
|
|
71
|
+
CLASS_TYPES: Dict[Language, List[str]] = {
|
|
72
|
+
Language.PYTHON: ["class_definition"],
|
|
73
|
+
Language.JAVASCRIPT: ["class_declaration", "class"],
|
|
74
|
+
Language.TYPESCRIPT: ["class_declaration", "interface_declaration"],
|
|
75
|
+
Language.GO: ["type_declaration"], # for struct types
|
|
76
|
+
Language.RUST: ["struct_item", "enum_item", "impl_item"],
|
|
77
|
+
Language.JAVA: ["class_declaration", "interface_declaration", "enum_declaration"],
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
# Node types for imports
|
|
81
|
+
IMPORT_TYPES: Dict[Language, List[str]] = {
|
|
82
|
+
Language.PYTHON: ["import_statement", "import_from_statement"],
|
|
83
|
+
Language.JAVASCRIPT: ["import_statement", "import_declaration"],
|
|
84
|
+
Language.TYPESCRIPT: ["import_statement", "import_declaration"],
|
|
85
|
+
Language.GO: ["import_declaration", "import_spec"],
|
|
86
|
+
Language.RUST: ["use_declaration"],
|
|
87
|
+
Language.JAVA: ["import_declaration"],
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TreeSitterParser(BaseParser):
|
|
92
|
+
"""Multi-language parser using tree-sitter.
|
|
93
|
+
|
|
94
|
+
Supports JavaScript, TypeScript, Go, Rust, Java, and more.
|
|
95
|
+
Falls back gracefully when tree-sitter is not installed.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
def __init__(self, language: Language = Language.UNKNOWN):
|
|
99
|
+
"""Initialize the tree-sitter parser.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
language: The language to parse (can be set later)
|
|
103
|
+
"""
|
|
104
|
+
super().__init__()
|
|
105
|
+
self._lang = language
|
|
106
|
+
self._tree: Optional[Any] = None
|
|
107
|
+
self._ts_language: Optional[Any] = None
|
|
108
|
+
self._parser: Optional[Any] = None
|
|
109
|
+
self._symbols: List[Symbol] = []
|
|
110
|
+
self._imports: List[Import] = []
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def language(self) -> Language:
|
|
114
|
+
"""Return the language this parser handles."""
|
|
115
|
+
return self._lang
|
|
116
|
+
|
|
117
|
+
@language.setter
|
|
118
|
+
def language(self, lang: Language):
|
|
119
|
+
"""Set the language and initialize the parser."""
|
|
120
|
+
if self._lang != lang:
|
|
121
|
+
self._lang = lang
|
|
122
|
+
self._init_parser()
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def is_available(cls) -> bool:
|
|
126
|
+
"""Check if tree-sitter is available."""
|
|
127
|
+
return TREE_SITTER_AVAILABLE
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def supported_languages(cls) -> List[Language]:
|
|
131
|
+
"""Get list of languages with available tree-sitter support."""
|
|
132
|
+
if not TREE_SITTER_AVAILABLE:
|
|
133
|
+
return []
|
|
134
|
+
|
|
135
|
+
available = []
|
|
136
|
+
for lang in _LANGUAGE_MODULES:
|
|
137
|
+
if _get_tree_sitter_language(lang) is not None:
|
|
138
|
+
available.append(lang)
|
|
139
|
+
return available
|
|
140
|
+
|
|
141
|
+
def _init_parser(self):
|
|
142
|
+
"""Initialize the tree-sitter parser for the current language."""
|
|
143
|
+
if not TREE_SITTER_AVAILABLE:
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
self._ts_language = _get_tree_sitter_language(self._lang)
|
|
147
|
+
if self._ts_language is not None:
|
|
148
|
+
self._parser = tree_sitter.Parser(self._ts_language)
|
|
149
|
+
|
|
150
|
+
def parse(self, source: str, filepath: str = "") -> bool:
|
|
151
|
+
"""Parse source code using tree-sitter.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
source: The source code to parse
|
|
155
|
+
filepath: Optional file path for context
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
True if parsing succeeded
|
|
159
|
+
"""
|
|
160
|
+
self.reset()
|
|
161
|
+
self._source = source
|
|
162
|
+
self._filepath = filepath
|
|
163
|
+
self._lines = source.splitlines()
|
|
164
|
+
|
|
165
|
+
# Auto-detect language from filepath if not set
|
|
166
|
+
if self._lang == Language.UNKNOWN and filepath:
|
|
167
|
+
self._lang = Language.from_extension(os.path.splitext(filepath)[1])
|
|
168
|
+
self._init_parser()
|
|
169
|
+
|
|
170
|
+
if self._parser is None:
|
|
171
|
+
self._init_parser()
|
|
172
|
+
|
|
173
|
+
if self._parser is None:
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
self._tree = self._parser.parse(source.encode('utf-8'))
|
|
178
|
+
self._parsed = True
|
|
179
|
+
self._extract_symbols()
|
|
180
|
+
self._extract_imports_internal()
|
|
181
|
+
return True
|
|
182
|
+
except Exception:
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
def reset(self):
|
|
186
|
+
"""Reset parser state."""
|
|
187
|
+
super().reset()
|
|
188
|
+
self._tree = None
|
|
189
|
+
self._symbols = []
|
|
190
|
+
self._imports = []
|
|
191
|
+
|
|
192
|
+
def _get_node_text(self, node) -> str:
|
|
193
|
+
"""Get the text content of a tree-sitter node."""
|
|
194
|
+
if self._source is None:
|
|
195
|
+
return ""
|
|
196
|
+
return self._source[node.start_byte:node.end_byte]
|
|
197
|
+
|
|
198
|
+
def _get_name_from_node(self, node) -> Optional[str]:
|
|
199
|
+
"""Extract the name identifier from a definition node."""
|
|
200
|
+
# Common patterns for finding names
|
|
201
|
+
name_field_types = ["name", "identifier", "property_name"]
|
|
202
|
+
|
|
203
|
+
for child in node.children:
|
|
204
|
+
if child.type in ["identifier", "property_identifier", "type_identifier"]:
|
|
205
|
+
return self._get_node_text(child)
|
|
206
|
+
if hasattr(node, 'child_by_field_name'):
|
|
207
|
+
for field in name_field_types:
|
|
208
|
+
name_node = node.child_by_field_name(field)
|
|
209
|
+
if name_node:
|
|
210
|
+
return self._get_node_text(name_node)
|
|
211
|
+
|
|
212
|
+
# Fallback: first identifier child
|
|
213
|
+
for child in node.children:
|
|
214
|
+
if "identifier" in child.type:
|
|
215
|
+
return self._get_node_text(child)
|
|
216
|
+
|
|
217
|
+
return None
|
|
218
|
+
|
|
219
|
+
def _extract_symbols(self):
|
|
220
|
+
"""Extract all function and class symbols from the parse tree."""
|
|
221
|
+
if self._tree is None:
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
function_types = FUNCTION_TYPES.get(self._lang, [])
|
|
225
|
+
class_types = CLASS_TYPES.get(self._lang, [])
|
|
226
|
+
|
|
227
|
+
def visit_node(node, parent_symbol: Optional[Symbol] = None):
|
|
228
|
+
symbol = None
|
|
229
|
+
kind = None
|
|
230
|
+
|
|
231
|
+
if node.type in function_types:
|
|
232
|
+
kind = "function"
|
|
233
|
+
if parent_symbol and parent_symbol.kind == "class":
|
|
234
|
+
kind = "method"
|
|
235
|
+
elif node.type in class_types:
|
|
236
|
+
kind = "class"
|
|
237
|
+
|
|
238
|
+
if kind:
|
|
239
|
+
name = self._get_name_from_node(node)
|
|
240
|
+
if name:
|
|
241
|
+
# Get line numbers (tree-sitter uses 0-based)
|
|
242
|
+
start_line = node.start_point[0] + 1
|
|
243
|
+
end_line = node.end_point[0] + 1
|
|
244
|
+
|
|
245
|
+
symbol = Symbol(
|
|
246
|
+
name=name,
|
|
247
|
+
kind=kind,
|
|
248
|
+
start_line=start_line,
|
|
249
|
+
end_line=end_line,
|
|
250
|
+
start_col=node.start_point[1],
|
|
251
|
+
end_col=node.end_point[1],
|
|
252
|
+
parent=parent_symbol,
|
|
253
|
+
)
|
|
254
|
+
self._symbols.append(symbol)
|
|
255
|
+
|
|
256
|
+
# Visit children
|
|
257
|
+
new_parent = symbol if symbol else parent_symbol
|
|
258
|
+
for child in node.children:
|
|
259
|
+
visit_node(child, new_parent)
|
|
260
|
+
|
|
261
|
+
visit_node(self._tree.root_node)
|
|
262
|
+
|
|
263
|
+
def _extract_imports_internal(self):
|
|
264
|
+
"""Extract import statements from the parse tree."""
|
|
265
|
+
if self._tree is None:
|
|
266
|
+
return
|
|
267
|
+
|
|
268
|
+
import_types = IMPORT_TYPES.get(self._lang, [])
|
|
269
|
+
|
|
270
|
+
def visit_node(node):
|
|
271
|
+
if node.type in import_types:
|
|
272
|
+
imp = self._parse_import_node(node)
|
|
273
|
+
if imp:
|
|
274
|
+
self._imports.append(imp)
|
|
275
|
+
|
|
276
|
+
for child in node.children:
|
|
277
|
+
visit_node(child)
|
|
278
|
+
|
|
279
|
+
visit_node(self._tree.root_node)
|
|
280
|
+
|
|
281
|
+
def _parse_import_node(self, node) -> Optional[Import]:
|
|
282
|
+
"""Parse an import node into an Import object."""
|
|
283
|
+
import_text = self._get_node_text(node)
|
|
284
|
+
line_number = node.start_point[0] + 1
|
|
285
|
+
|
|
286
|
+
if self._lang == Language.PYTHON:
|
|
287
|
+
return self._parse_python_import(node, import_text, line_number)
|
|
288
|
+
elif self._lang in (Language.JAVASCRIPT, Language.TYPESCRIPT):
|
|
289
|
+
return self._parse_js_import(node, import_text, line_number)
|
|
290
|
+
elif self._lang == Language.GO:
|
|
291
|
+
return self._parse_go_import(node, import_text, line_number)
|
|
292
|
+
elif self._lang == Language.RUST:
|
|
293
|
+
return self._parse_rust_import(node, import_text, line_number)
|
|
294
|
+
elif self._lang == Language.JAVA:
|
|
295
|
+
return self._parse_java_import(node, import_text, line_number)
|
|
296
|
+
|
|
297
|
+
# Generic fallback
|
|
298
|
+
return Import(
|
|
299
|
+
module_name=import_text,
|
|
300
|
+
line_number=line_number,
|
|
301
|
+
language=self._lang,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
def _parse_python_import(self, node, text: str, line: int) -> Optional[Import]:
|
|
305
|
+
"""Parse Python import statement."""
|
|
306
|
+
# Handle: import x, from x import y
|
|
307
|
+
module_name = ""
|
|
308
|
+
imported_names = []
|
|
309
|
+
is_relative = False
|
|
310
|
+
relative_level = 0
|
|
311
|
+
|
|
312
|
+
if node.type == "import_statement":
|
|
313
|
+
# import x, import x as y
|
|
314
|
+
for child in node.children:
|
|
315
|
+
if child.type == "dotted_name":
|
|
316
|
+
module_name = self._get_node_text(child)
|
|
317
|
+
elif child.type == "aliased_import":
|
|
318
|
+
for subchild in child.children:
|
|
319
|
+
if subchild.type == "dotted_name":
|
|
320
|
+
module_name = self._get_node_text(subchild)
|
|
321
|
+
break
|
|
322
|
+
|
|
323
|
+
elif node.type == "import_from_statement":
|
|
324
|
+
# from x import y
|
|
325
|
+
for child in node.children:
|
|
326
|
+
if child.type == "dotted_name":
|
|
327
|
+
module_name = self._get_node_text(child)
|
|
328
|
+
elif child.type == "relative_import":
|
|
329
|
+
is_relative = True
|
|
330
|
+
dots = self._get_node_text(child)
|
|
331
|
+
relative_level = dots.count('.')
|
|
332
|
+
# Get module name after dots
|
|
333
|
+
for subchild in child.children:
|
|
334
|
+
if subchild.type == "dotted_name":
|
|
335
|
+
module_name = self._get_node_text(subchild)
|
|
336
|
+
elif child.type in ("identifier", "wildcard_import"):
|
|
337
|
+
imported_names.append(self._get_node_text(child))
|
|
338
|
+
elif child.type == "aliased_import":
|
|
339
|
+
for subchild in child.children:
|
|
340
|
+
if subchild.type == "identifier":
|
|
341
|
+
imported_names.append(self._get_node_text(subchild))
|
|
342
|
+
break
|
|
343
|
+
|
|
344
|
+
if not module_name and not imported_names:
|
|
345
|
+
return None
|
|
346
|
+
|
|
347
|
+
return Import(
|
|
348
|
+
module_name=module_name,
|
|
349
|
+
imported_names=imported_names,
|
|
350
|
+
is_relative=is_relative,
|
|
351
|
+
relative_level=relative_level,
|
|
352
|
+
line_number=line,
|
|
353
|
+
language=Language.PYTHON,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
def _parse_js_import(self, node, text: str, line: int) -> Optional[Import]:
|
|
357
|
+
"""Parse JavaScript/TypeScript import statement."""
|
|
358
|
+
module_name = ""
|
|
359
|
+
imported_names = []
|
|
360
|
+
alias = None
|
|
361
|
+
|
|
362
|
+
for child in node.children:
|
|
363
|
+
if child.type == "string":
|
|
364
|
+
# The module path is in a string
|
|
365
|
+
module_name = self._get_node_text(child).strip("'\"")
|
|
366
|
+
elif child.type == "import_clause":
|
|
367
|
+
for subchild in child.children:
|
|
368
|
+
if subchild.type == "identifier":
|
|
369
|
+
# Default import
|
|
370
|
+
alias = self._get_node_text(subchild)
|
|
371
|
+
elif subchild.type == "named_imports":
|
|
372
|
+
# Named imports: { a, b, c }
|
|
373
|
+
for imp_spec in subchild.children:
|
|
374
|
+
if imp_spec.type == "import_specifier":
|
|
375
|
+
for name_node in imp_spec.children:
|
|
376
|
+
if name_node.type == "identifier":
|
|
377
|
+
imported_names.append(self._get_node_text(name_node))
|
|
378
|
+
break
|
|
379
|
+
elif subchild.type == "namespace_import":
|
|
380
|
+
# import * as X
|
|
381
|
+
for name_node in subchild.children:
|
|
382
|
+
if name_node.type == "identifier":
|
|
383
|
+
alias = self._get_node_text(name_node)
|
|
384
|
+
|
|
385
|
+
if not module_name:
|
|
386
|
+
return None
|
|
387
|
+
|
|
388
|
+
is_relative = module_name.startswith('.') or module_name.startswith('/')
|
|
389
|
+
|
|
390
|
+
return Import(
|
|
391
|
+
module_name=module_name,
|
|
392
|
+
alias=alias,
|
|
393
|
+
imported_names=imported_names,
|
|
394
|
+
is_relative=is_relative,
|
|
395
|
+
line_number=line,
|
|
396
|
+
language=self._lang,
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
def _parse_go_import(self, node, text: str, line: int) -> Optional[Import]:
|
|
400
|
+
"""Parse Go import statement."""
|
|
401
|
+
module_name = ""
|
|
402
|
+
alias = None
|
|
403
|
+
|
|
404
|
+
# Handle both single imports and import blocks
|
|
405
|
+
if node.type == "import_spec":
|
|
406
|
+
for child in node.children:
|
|
407
|
+
if child.type == "interpreted_string_literal":
|
|
408
|
+
module_name = self._get_node_text(child).strip('"')
|
|
409
|
+
elif child.type == "package_identifier":
|
|
410
|
+
alias = self._get_node_text(child)
|
|
411
|
+
elif child.type == "blank_identifier":
|
|
412
|
+
alias = "_"
|
|
413
|
+
elif child.type == "dot":
|
|
414
|
+
alias = "."
|
|
415
|
+
elif node.type == "import_declaration":
|
|
416
|
+
# Find import_spec children
|
|
417
|
+
for child in node.children:
|
|
418
|
+
if child.type == "import_spec":
|
|
419
|
+
return self._parse_go_import(child, self._get_node_text(child), line)
|
|
420
|
+
elif child.type == "import_spec_list":
|
|
421
|
+
# Multiple imports - just return first one for now
|
|
422
|
+
for spec in child.children:
|
|
423
|
+
if spec.type == "import_spec":
|
|
424
|
+
return self._parse_go_import(spec, self._get_node_text(spec), line)
|
|
425
|
+
elif child.type == "interpreted_string_literal":
|
|
426
|
+
module_name = self._get_node_text(child).strip('"')
|
|
427
|
+
|
|
428
|
+
if not module_name:
|
|
429
|
+
return None
|
|
430
|
+
|
|
431
|
+
return Import(
|
|
432
|
+
module_name=module_name,
|
|
433
|
+
alias=alias,
|
|
434
|
+
line_number=line,
|
|
435
|
+
language=Language.GO,
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
def _parse_rust_import(self, node, text: str, line: int) -> Optional[Import]:
|
|
439
|
+
"""Parse Rust use statement."""
|
|
440
|
+
# use statements can be complex: use std::io::{Read, Write};
|
|
441
|
+
module_name = ""
|
|
442
|
+
imported_names = []
|
|
443
|
+
|
|
444
|
+
def extract_path(n) -> str:
|
|
445
|
+
if n.type == "identifier" or n.type == "crate":
|
|
446
|
+
return self._get_node_text(n)
|
|
447
|
+
elif n.type == "scoped_identifier":
|
|
448
|
+
parts = []
|
|
449
|
+
for child in n.children:
|
|
450
|
+
if child.type in ("identifier", "crate", "scoped_identifier"):
|
|
451
|
+
parts.append(extract_path(child))
|
|
452
|
+
return "::".join(p for p in parts if p)
|
|
453
|
+
return ""
|
|
454
|
+
|
|
455
|
+
for child in node.children:
|
|
456
|
+
if child.type == "use_list":
|
|
457
|
+
# Multiple imports
|
|
458
|
+
for item in child.children:
|
|
459
|
+
if "identifier" in item.type:
|
|
460
|
+
imported_names.append(self._get_node_text(item))
|
|
461
|
+
elif child.type == "scoped_identifier":
|
|
462
|
+
module_name = extract_path(child)
|
|
463
|
+
elif child.type == "identifier":
|
|
464
|
+
module_name = self._get_node_text(child)
|
|
465
|
+
elif child.type == "scoped_use_list":
|
|
466
|
+
# use std::io::{Read, Write}
|
|
467
|
+
for subchild in child.children:
|
|
468
|
+
if subchild.type == "scoped_identifier":
|
|
469
|
+
module_name = extract_path(subchild)
|
|
470
|
+
elif subchild.type == "use_list":
|
|
471
|
+
for item in subchild.children:
|
|
472
|
+
if "identifier" in item.type:
|
|
473
|
+
imported_names.append(self._get_node_text(item))
|
|
474
|
+
|
|
475
|
+
if not module_name and not imported_names:
|
|
476
|
+
return None
|
|
477
|
+
|
|
478
|
+
return Import(
|
|
479
|
+
module_name=module_name,
|
|
480
|
+
imported_names=imported_names,
|
|
481
|
+
line_number=line,
|
|
482
|
+
language=Language.RUST,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
def _parse_java_import(self, node, text: str, line: int) -> Optional[Import]:
|
|
486
|
+
"""Parse Java import statement."""
|
|
487
|
+
module_name = ""
|
|
488
|
+
imported_names = []
|
|
489
|
+
|
|
490
|
+
for child in node.children:
|
|
491
|
+
if child.type == "scoped_identifier":
|
|
492
|
+
# Build full path: com.example.MyClass
|
|
493
|
+
parts = []
|
|
494
|
+
|
|
495
|
+
def collect_parts(n):
|
|
496
|
+
for c in n.children:
|
|
497
|
+
if c.type == "identifier":
|
|
498
|
+
parts.append(self._get_node_text(c))
|
|
499
|
+
elif c.type == "scoped_identifier":
|
|
500
|
+
collect_parts(c)
|
|
501
|
+
|
|
502
|
+
collect_parts(child)
|
|
503
|
+
module_name = ".".join(parts)
|
|
504
|
+
|
|
505
|
+
elif child.type == "asterisk":
|
|
506
|
+
imported_names.append("*")
|
|
507
|
+
|
|
508
|
+
if not module_name:
|
|
509
|
+
return None
|
|
510
|
+
|
|
511
|
+
return Import(
|
|
512
|
+
module_name=module_name,
|
|
513
|
+
imported_names=imported_names,
|
|
514
|
+
line_number=line,
|
|
515
|
+
language=Language.JAVA,
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
def find_enclosing_symbol(self, line_number: int) -> Optional[Symbol]:
|
|
519
|
+
"""Find the innermost symbol containing the given line.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
line_number: 1-based line number
|
|
523
|
+
|
|
524
|
+
Returns:
|
|
525
|
+
The innermost Symbol containing the line, or None
|
|
526
|
+
"""
|
|
527
|
+
best_match: Optional[Symbol] = None
|
|
528
|
+
best_size = float('inf')
|
|
529
|
+
|
|
530
|
+
for symbol in self._symbols:
|
|
531
|
+
if symbol.contains_line(line_number):
|
|
532
|
+
size = symbol.end_line - symbol.start_line
|
|
533
|
+
if size < best_size:
|
|
534
|
+
best_match = symbol
|
|
535
|
+
best_size = size
|
|
536
|
+
|
|
537
|
+
return best_match
|
|
538
|
+
|
|
539
|
+
def extract_imports(self) -> List[Import]:
|
|
540
|
+
"""Return all extracted imports.
|
|
541
|
+
|
|
542
|
+
Returns:
|
|
543
|
+
List of Import objects
|
|
544
|
+
"""
|
|
545
|
+
return self._imports.copy()
|
|
546
|
+
|
|
547
|
+
def find_all_symbols(self) -> List[Symbol]:
|
|
548
|
+
"""Return all extracted symbols.
|
|
549
|
+
|
|
550
|
+
Returns:
|
|
551
|
+
List of all Symbol objects
|
|
552
|
+
"""
|
|
553
|
+
return self._symbols.copy()
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def create_parser_for_language(language: Language) -> Optional[TreeSitterParser]:
|
|
557
|
+
"""Factory function to create a tree-sitter parser for a language.
|
|
558
|
+
|
|
559
|
+
Args:
|
|
560
|
+
language: The language to create a parser for
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
TreeSitterParser instance or None if not supported
|
|
564
|
+
"""
|
|
565
|
+
if not TREE_SITTER_AVAILABLE:
|
|
566
|
+
return None
|
|
567
|
+
|
|
568
|
+
if language not in _LANGUAGE_MODULES:
|
|
569
|
+
return None
|
|
570
|
+
|
|
571
|
+
parser = TreeSitterParser(language)
|
|
572
|
+
parser._init_parser()
|
|
573
|
+
|
|
574
|
+
if parser._parser is None:
|
|
575
|
+
return None
|
|
576
|
+
|
|
577
|
+
return parser
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
# Register tree-sitter parsers with the registry
|
|
581
|
+
def _register_treesitter_parsers():
|
|
582
|
+
"""Register tree-sitter parsers for all available languages."""
|
|
583
|
+
if not TREE_SITTER_AVAILABLE:
|
|
584
|
+
return
|
|
585
|
+
|
|
586
|
+
from roma_debug.parsers.registry import register_parser
|
|
587
|
+
|
|
588
|
+
for lang in TreeSitterParser.supported_languages():
|
|
589
|
+
if lang != Language.PYTHON: # Python uses AST parser by default
|
|
590
|
+
register_parser(
|
|
591
|
+
lang,
|
|
592
|
+
TreeSitterParser,
|
|
593
|
+
factory=lambda l=lang: TreeSitterParser(l),
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
# Auto-register on import
|
|
598
|
+
_register_treesitter_parsers()
|