roma-debug 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- roma_debug/__init__.py +3 -0
- roma_debug/config.py +79 -0
- roma_debug/core/__init__.py +5 -0
- roma_debug/core/engine.py +423 -0
- roma_debug/core/models.py +313 -0
- roma_debug/main.py +753 -0
- roma_debug/parsers/__init__.py +21 -0
- roma_debug/parsers/base.py +189 -0
- roma_debug/parsers/python_ast_parser.py +268 -0
- roma_debug/parsers/registry.py +196 -0
- roma_debug/parsers/traceback_patterns.py +314 -0
- roma_debug/parsers/treesitter_parser.py +598 -0
- roma_debug/prompts.py +153 -0
- roma_debug/server.py +247 -0
- roma_debug/tracing/__init__.py +28 -0
- roma_debug/tracing/call_chain.py +278 -0
- roma_debug/tracing/context_builder.py +672 -0
- roma_debug/tracing/dependency_graph.py +298 -0
- roma_debug/tracing/error_analyzer.py +399 -0
- roma_debug/tracing/import_resolver.py +315 -0
- roma_debug/tracing/project_scanner.py +569 -0
- roma_debug/utils/__init__.py +5 -0
- roma_debug/utils/context.py +422 -0
- roma_debug-0.1.0.dist-info/METADATA +34 -0
- roma_debug-0.1.0.dist-info/RECORD +36 -0
- roma_debug-0.1.0.dist-info/WHEEL +5 -0
- roma_debug-0.1.0.dist-info/entry_points.txt +2 -0
- roma_debug-0.1.0.dist-info/licenses/LICENSE +201 -0
- roma_debug-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +1 -0
- tests/test_context.py +208 -0
- tests/test_engine.py +296 -0
- tests/test_parsers.py +534 -0
- tests/test_project_scanner.py +275 -0
- tests/test_traceback_patterns.py +222 -0
- tests/test_tracing.py +296 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""ROMA Debug Parsers - Multi-language source code parsing.
|
|
2
|
+
|
|
3
|
+
This package provides extensible parser support for extracting
|
|
4
|
+
semantic information from source files in multiple programming languages.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from roma_debug.parsers.base import BaseParser
|
|
8
|
+
from roma_debug.parsers.registry import (
|
|
9
|
+
get_parser,
|
|
10
|
+
detect_language,
|
|
11
|
+
register_parser,
|
|
12
|
+
ParserRegistry,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"BaseParser",
|
|
17
|
+
"get_parser",
|
|
18
|
+
"detect_language",
|
|
19
|
+
"register_parser",
|
|
20
|
+
"ParserRegistry",
|
|
21
|
+
]
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""Abstract base class for language parsers.
|
|
2
|
+
|
|
3
|
+
All language-specific parsers must implement this interface.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from typing import Optional, List
|
|
8
|
+
|
|
9
|
+
from roma_debug.core.models import Language, Symbol, Import
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BaseParser(ABC):
|
|
13
|
+
"""Abstract base class for language parsers.
|
|
14
|
+
|
|
15
|
+
Each language parser must implement methods for:
|
|
16
|
+
- Parsing source code
|
|
17
|
+
- Finding symbols (functions, classes) at specific lines
|
|
18
|
+
- Extracting import statements
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
"""Initialize the parser."""
|
|
23
|
+
self._source: Optional[str] = None
|
|
24
|
+
self._filepath: Optional[str] = None
|
|
25
|
+
self._lines: List[str] = []
|
|
26
|
+
self._parsed: bool = False
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def language(self) -> Language:
|
|
31
|
+
"""Return the language this parser handles."""
|
|
32
|
+
...
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def source(self) -> Optional[str]:
|
|
36
|
+
"""Return the parsed source code."""
|
|
37
|
+
return self._source
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def filepath(self) -> Optional[str]:
|
|
41
|
+
"""Return the file path being parsed."""
|
|
42
|
+
return self._filepath
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def lines(self) -> List[str]:
|
|
46
|
+
"""Return source split into lines."""
|
|
47
|
+
return self._lines
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def is_parsed(self) -> bool:
|
|
51
|
+
"""Check if a file has been successfully parsed."""
|
|
52
|
+
return self._parsed
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def parse(self, source: str, filepath: str = "") -> bool:
|
|
56
|
+
"""Parse source code.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
source: The source code to parse
|
|
60
|
+
filepath: Optional file path for context
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
True if parsing succeeded, False otherwise
|
|
64
|
+
"""
|
|
65
|
+
...
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def find_enclosing_symbol(self, line_number: int) -> Optional[Symbol]:
|
|
69
|
+
"""Find the innermost symbol containing the given line.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
line_number: 1-based line number
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Symbol if found, None otherwise
|
|
76
|
+
"""
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def extract_imports(self) -> List[Import]:
|
|
81
|
+
"""Extract all import statements from the parsed source.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
List of Import objects
|
|
85
|
+
"""
|
|
86
|
+
...
|
|
87
|
+
|
|
88
|
+
def get_symbol_at_line(self, line_number: int) -> Optional[Symbol]:
|
|
89
|
+
"""Alias for find_enclosing_symbol for backward compatibility."""
|
|
90
|
+
return self.find_enclosing_symbol(line_number)
|
|
91
|
+
|
|
92
|
+
def get_line_content(self, line_number: int) -> Optional[str]:
|
|
93
|
+
"""Get the content of a specific line.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
line_number: 1-based line number
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Line content or None if out of range
|
|
100
|
+
"""
|
|
101
|
+
if not self._lines:
|
|
102
|
+
return None
|
|
103
|
+
idx = line_number - 1
|
|
104
|
+
if 0 <= idx < len(self._lines):
|
|
105
|
+
return self._lines[idx]
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
def get_line_range(self, start: int, end: int) -> List[str]:
|
|
109
|
+
"""Get a range of lines.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
start: 1-based start line (inclusive)
|
|
113
|
+
end: 1-based end line (inclusive)
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
List of line contents
|
|
117
|
+
"""
|
|
118
|
+
if not self._lines:
|
|
119
|
+
return []
|
|
120
|
+
start_idx = max(0, start - 1)
|
|
121
|
+
end_idx = min(len(self._lines), end)
|
|
122
|
+
return self._lines[start_idx:end_idx]
|
|
123
|
+
|
|
124
|
+
def format_snippet(
|
|
125
|
+
self,
|
|
126
|
+
start_line: int,
|
|
127
|
+
end_line: int,
|
|
128
|
+
highlight_line: Optional[int] = None,
|
|
129
|
+
with_line_numbers: bool = True,
|
|
130
|
+
) -> str:
|
|
131
|
+
"""Format a code snippet with optional line numbers and highlighting.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
start_line: 1-based start line
|
|
135
|
+
end_line: 1-based end line
|
|
136
|
+
highlight_line: Optional line to highlight with >>
|
|
137
|
+
with_line_numbers: Whether to include line numbers
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Formatted snippet string
|
|
141
|
+
"""
|
|
142
|
+
lines = self.get_line_range(start_line, end_line)
|
|
143
|
+
result = []
|
|
144
|
+
|
|
145
|
+
for i, line in enumerate(lines):
|
|
146
|
+
line_num = start_line + i
|
|
147
|
+
if with_line_numbers:
|
|
148
|
+
marker = " >> " if line_num == highlight_line else " "
|
|
149
|
+
result.append(f"{marker}{line_num:4d} | {line}")
|
|
150
|
+
else:
|
|
151
|
+
marker = ">> " if line_num == highlight_line else " "
|
|
152
|
+
result.append(f"{marker}{line}")
|
|
153
|
+
|
|
154
|
+
return "\n".join(result)
|
|
155
|
+
|
|
156
|
+
def extract_symbol_code(
|
|
157
|
+
self,
|
|
158
|
+
symbol: Symbol,
|
|
159
|
+
include_decorators: bool = True,
|
|
160
|
+
context_before: int = 0,
|
|
161
|
+
context_after: int = 0,
|
|
162
|
+
) -> str:
|
|
163
|
+
"""Extract the full code for a symbol.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
symbol: The symbol to extract
|
|
167
|
+
include_decorators: Whether to include decorator lines
|
|
168
|
+
context_before: Extra lines before the symbol
|
|
169
|
+
context_after: Extra lines after the symbol
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
The symbol's source code
|
|
173
|
+
"""
|
|
174
|
+
start = symbol.start_line
|
|
175
|
+
if include_decorators and symbol.decorators:
|
|
176
|
+
# Decorators are typically on lines before the definition
|
|
177
|
+
start = max(1, start - len(symbol.decorators))
|
|
178
|
+
|
|
179
|
+
start = max(1, start - context_before)
|
|
180
|
+
end = min(len(self._lines), symbol.end_line + context_after)
|
|
181
|
+
|
|
182
|
+
return "\n".join(self.get_line_range(start, end))
|
|
183
|
+
|
|
184
|
+
def reset(self):
|
|
185
|
+
"""Reset parser state."""
|
|
186
|
+
self._source = None
|
|
187
|
+
self._filepath = None
|
|
188
|
+
self._lines = []
|
|
189
|
+
self._parsed = False
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""Python AST-based parser implementation.
|
|
2
|
+
|
|
3
|
+
Uses Python's built-in ast module for parsing Python source code.
|
|
4
|
+
This is the legacy fallback parser that provides reliable Python parsing.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import ast
|
|
8
|
+
from typing import Optional, List
|
|
9
|
+
|
|
10
|
+
from roma_debug.core.models import Language, Symbol, Import
|
|
11
|
+
from roma_debug.parsers.base import BaseParser
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PythonAstParser(BaseParser):
|
|
15
|
+
"""Python parser using the built-in ast module.
|
|
16
|
+
|
|
17
|
+
This parser provides reliable Python parsing using Python's own
|
|
18
|
+
AST module. It's used as the primary Python parser and as a
|
|
19
|
+
fallback when tree-sitter is unavailable.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self):
|
|
23
|
+
"""Initialize the Python AST parser."""
|
|
24
|
+
super().__init__()
|
|
25
|
+
self._tree: Optional[ast.AST] = None
|
|
26
|
+
self._symbols: List[Symbol] = []
|
|
27
|
+
self._imports: List[Import] = []
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def language(self) -> Language:
|
|
31
|
+
"""Return Python as the language."""
|
|
32
|
+
return Language.PYTHON
|
|
33
|
+
|
|
34
|
+
def parse(self, source: str, filepath: str = "") -> bool:
|
|
35
|
+
"""Parse Python source code using ast.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
source: Python source code
|
|
39
|
+
filepath: Optional file path
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
True if parsing succeeded
|
|
43
|
+
"""
|
|
44
|
+
self.reset()
|
|
45
|
+
self._source = source
|
|
46
|
+
self._filepath = filepath
|
|
47
|
+
self._lines = source.splitlines()
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
self._tree = ast.parse(source)
|
|
51
|
+
self._parsed = True
|
|
52
|
+
self._extract_symbols()
|
|
53
|
+
self._extract_imports_internal()
|
|
54
|
+
return True
|
|
55
|
+
except SyntaxError:
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
def reset(self):
|
|
59
|
+
"""Reset parser state."""
|
|
60
|
+
super().reset()
|
|
61
|
+
self._tree = None
|
|
62
|
+
self._symbols = []
|
|
63
|
+
self._imports = []
|
|
64
|
+
|
|
65
|
+
def _extract_symbols(self):
|
|
66
|
+
"""Extract all function and class symbols from the AST."""
|
|
67
|
+
if self._tree is None:
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
def visit_node(node: ast.AST, parent: Optional[Symbol] = None):
|
|
71
|
+
symbol = None
|
|
72
|
+
|
|
73
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
74
|
+
kind = "async_function" if isinstance(node, ast.AsyncFunctionDef) else "function"
|
|
75
|
+
if parent and parent.kind == "class":
|
|
76
|
+
kind = "method"
|
|
77
|
+
|
|
78
|
+
# Extract decorators
|
|
79
|
+
decorators = []
|
|
80
|
+
for dec in node.decorator_list:
|
|
81
|
+
if isinstance(dec, ast.Name):
|
|
82
|
+
decorators.append(dec.id)
|
|
83
|
+
elif isinstance(dec, ast.Attribute):
|
|
84
|
+
decorators.append(ast.unparse(dec) if hasattr(ast, 'unparse') else str(dec.attr))
|
|
85
|
+
elif isinstance(dec, ast.Call):
|
|
86
|
+
if isinstance(dec.func, ast.Name):
|
|
87
|
+
decorators.append(dec.func.id)
|
|
88
|
+
elif isinstance(dec.func, ast.Attribute):
|
|
89
|
+
decorators.append(dec.func.attr)
|
|
90
|
+
|
|
91
|
+
# Extract docstring
|
|
92
|
+
docstring = ast.get_docstring(node)
|
|
93
|
+
|
|
94
|
+
symbol = Symbol(
|
|
95
|
+
name=node.name,
|
|
96
|
+
kind=kind,
|
|
97
|
+
start_line=node.lineno,
|
|
98
|
+
end_line=node.end_lineno or node.lineno,
|
|
99
|
+
start_col=node.col_offset,
|
|
100
|
+
end_col=node.end_col_offset or 0,
|
|
101
|
+
parent=parent,
|
|
102
|
+
decorators=decorators,
|
|
103
|
+
docstring=docstring,
|
|
104
|
+
)
|
|
105
|
+
self._symbols.append(symbol)
|
|
106
|
+
|
|
107
|
+
elif isinstance(node, ast.ClassDef):
|
|
108
|
+
# Extract decorators
|
|
109
|
+
decorators = []
|
|
110
|
+
for dec in node.decorator_list:
|
|
111
|
+
if isinstance(dec, ast.Name):
|
|
112
|
+
decorators.append(dec.id)
|
|
113
|
+
elif isinstance(dec, ast.Attribute):
|
|
114
|
+
decorators.append(ast.unparse(dec) if hasattr(ast, 'unparse') else str(dec.attr))
|
|
115
|
+
|
|
116
|
+
# Extract docstring
|
|
117
|
+
docstring = ast.get_docstring(node)
|
|
118
|
+
|
|
119
|
+
symbol = Symbol(
|
|
120
|
+
name=node.name,
|
|
121
|
+
kind="class",
|
|
122
|
+
start_line=node.lineno,
|
|
123
|
+
end_line=node.end_lineno or node.lineno,
|
|
124
|
+
start_col=node.col_offset,
|
|
125
|
+
end_col=node.end_col_offset or 0,
|
|
126
|
+
parent=parent,
|
|
127
|
+
decorators=decorators,
|
|
128
|
+
docstring=docstring,
|
|
129
|
+
)
|
|
130
|
+
self._symbols.append(symbol)
|
|
131
|
+
|
|
132
|
+
# Visit children
|
|
133
|
+
new_parent = symbol if symbol else parent
|
|
134
|
+
for child in ast.iter_child_nodes(node):
|
|
135
|
+
visit_node(child, new_parent)
|
|
136
|
+
|
|
137
|
+
visit_node(self._tree)
|
|
138
|
+
|
|
139
|
+
def _extract_imports_internal(self):
|
|
140
|
+
"""Extract import statements from the AST."""
|
|
141
|
+
if self._tree is None:
|
|
142
|
+
return
|
|
143
|
+
|
|
144
|
+
for node in ast.walk(self._tree):
|
|
145
|
+
if isinstance(node, ast.Import):
|
|
146
|
+
for alias in node.names:
|
|
147
|
+
self._imports.append(Import(
|
|
148
|
+
module_name=alias.name,
|
|
149
|
+
alias=alias.asname,
|
|
150
|
+
imported_names=[],
|
|
151
|
+
is_relative=False,
|
|
152
|
+
relative_level=0,
|
|
153
|
+
line_number=node.lineno,
|
|
154
|
+
language=Language.PYTHON,
|
|
155
|
+
))
|
|
156
|
+
|
|
157
|
+
elif isinstance(node, ast.ImportFrom):
|
|
158
|
+
module = node.module or ""
|
|
159
|
+
imported_names = [alias.name for alias in node.names]
|
|
160
|
+
aliases = {alias.name: alias.asname for alias in node.names if alias.asname}
|
|
161
|
+
|
|
162
|
+
self._imports.append(Import(
|
|
163
|
+
module_name=module,
|
|
164
|
+
alias=None,
|
|
165
|
+
imported_names=imported_names,
|
|
166
|
+
is_relative=node.level > 0,
|
|
167
|
+
relative_level=node.level,
|
|
168
|
+
line_number=node.lineno,
|
|
169
|
+
language=Language.PYTHON,
|
|
170
|
+
))
|
|
171
|
+
|
|
172
|
+
def find_enclosing_symbol(self, line_number: int) -> Optional[Symbol]:
|
|
173
|
+
"""Find the innermost symbol containing the given line.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
line_number: 1-based line number
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
The innermost Symbol containing the line, or None
|
|
180
|
+
"""
|
|
181
|
+
best_match: Optional[Symbol] = None
|
|
182
|
+
best_size = float('inf')
|
|
183
|
+
|
|
184
|
+
for symbol in self._symbols:
|
|
185
|
+
if symbol.contains_line(line_number):
|
|
186
|
+
size = symbol.end_line - symbol.start_line
|
|
187
|
+
if size < best_size:
|
|
188
|
+
best_match = symbol
|
|
189
|
+
best_size = size
|
|
190
|
+
|
|
191
|
+
return best_match
|
|
192
|
+
|
|
193
|
+
def extract_imports(self) -> List[Import]:
|
|
194
|
+
"""Return all extracted imports.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
List of Import objects
|
|
198
|
+
"""
|
|
199
|
+
return self._imports.copy()
|
|
200
|
+
|
|
201
|
+
def find_all_symbols(self) -> List[Symbol]:
|
|
202
|
+
"""Return all extracted symbols.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
List of all Symbol objects
|
|
206
|
+
"""
|
|
207
|
+
return self._symbols.copy()
|
|
208
|
+
|
|
209
|
+
def find_symbols_by_name(self, name: str) -> List[Symbol]:
|
|
210
|
+
"""Find all symbols with the given name.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
name: Symbol name to search for
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
List of matching Symbol objects
|
|
217
|
+
"""
|
|
218
|
+
return [s for s in self._symbols if s.name == name]
|
|
219
|
+
|
|
220
|
+
def find_symbols_by_kind(self, kind: str) -> List[Symbol]:
|
|
221
|
+
"""Find all symbols of a given kind.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
kind: Symbol kind ('function', 'class', 'method', etc.)
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
List of matching Symbol objects
|
|
228
|
+
"""
|
|
229
|
+
return [s for s in self._symbols if s.kind == kind]
|
|
230
|
+
|
|
231
|
+
def get_function_calls_in_symbol(self, symbol: Symbol) -> List[str]:
|
|
232
|
+
"""Extract function/method calls within a symbol.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
symbol: The symbol to analyze
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
List of called function/method names
|
|
239
|
+
"""
|
|
240
|
+
if self._tree is None:
|
|
241
|
+
return []
|
|
242
|
+
|
|
243
|
+
calls = []
|
|
244
|
+
|
|
245
|
+
def find_symbol_node(node: ast.AST) -> Optional[ast.AST]:
|
|
246
|
+
"""Find the AST node for the given symbol."""
|
|
247
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
248
|
+
if node.name == symbol.name and node.lineno == symbol.start_line:
|
|
249
|
+
return node
|
|
250
|
+
for child in ast.iter_child_nodes(node):
|
|
251
|
+
result = find_symbol_node(child)
|
|
252
|
+
if result:
|
|
253
|
+
return result
|
|
254
|
+
return None
|
|
255
|
+
|
|
256
|
+
symbol_node = find_symbol_node(self._tree)
|
|
257
|
+
if symbol_node is None:
|
|
258
|
+
return []
|
|
259
|
+
|
|
260
|
+
for node in ast.walk(symbol_node):
|
|
261
|
+
if isinstance(node, ast.Call):
|
|
262
|
+
if isinstance(node.func, ast.Name):
|
|
263
|
+
calls.append(node.func.id)
|
|
264
|
+
elif isinstance(node.func, ast.Attribute):
|
|
265
|
+
# For method calls like obj.method()
|
|
266
|
+
calls.append(node.func.attr)
|
|
267
|
+
|
|
268
|
+
return calls
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Parser registry for language detection and parser dispatch.
|
|
2
|
+
|
|
3
|
+
Provides centralized parser management and language detection from file extensions.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
from typing import Dict, Optional, Type, Callable
|
|
8
|
+
|
|
9
|
+
from roma_debug.core.models import Language
|
|
10
|
+
from roma_debug.parsers.base import BaseParser
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ParserRegistry:
|
|
14
|
+
"""Registry for language parsers.
|
|
15
|
+
|
|
16
|
+
Manages parser registration and provides parser lookup by language
|
|
17
|
+
or file extension.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self):
|
|
21
|
+
"""Initialize the registry."""
|
|
22
|
+
self._parsers: Dict[Language, Type[BaseParser]] = {}
|
|
23
|
+
self._parser_factories: Dict[Language, Callable[[], BaseParser]] = {}
|
|
24
|
+
self._instances: Dict[Language, BaseParser] = {}
|
|
25
|
+
|
|
26
|
+
def register(
|
|
27
|
+
self,
|
|
28
|
+
language: Language,
|
|
29
|
+
parser_class: Type[BaseParser],
|
|
30
|
+
factory: Optional[Callable[[], BaseParser]] = None,
|
|
31
|
+
):
|
|
32
|
+
"""Register a parser for a language.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
language: The language this parser handles
|
|
36
|
+
parser_class: The parser class
|
|
37
|
+
factory: Optional factory function to create parser instances
|
|
38
|
+
"""
|
|
39
|
+
self._parsers[language] = parser_class
|
|
40
|
+
if factory:
|
|
41
|
+
self._parser_factories[language] = factory
|
|
42
|
+
|
|
43
|
+
def get_parser(
|
|
44
|
+
self,
|
|
45
|
+
language: Language,
|
|
46
|
+
create_new: bool = False,
|
|
47
|
+
) -> Optional[BaseParser]:
|
|
48
|
+
"""Get a parser for the given language.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
language: The language to get a parser for
|
|
52
|
+
create_new: If True, create a new instance instead of reusing
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Parser instance or None if no parser registered
|
|
56
|
+
"""
|
|
57
|
+
if language not in self._parsers:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
if create_new or language not in self._instances:
|
|
61
|
+
if language in self._parser_factories:
|
|
62
|
+
parser = self._parser_factories[language]()
|
|
63
|
+
else:
|
|
64
|
+
parser = self._parsers[language]()
|
|
65
|
+
if not create_new:
|
|
66
|
+
self._instances[language] = parser
|
|
67
|
+
return parser
|
|
68
|
+
|
|
69
|
+
return self._instances[language]
|
|
70
|
+
|
|
71
|
+
def get_parser_for_file(
|
|
72
|
+
self,
|
|
73
|
+
filepath: str,
|
|
74
|
+
create_new: bool = False,
|
|
75
|
+
) -> Optional[BaseParser]:
|
|
76
|
+
"""Get a parser based on file extension.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
filepath: Path to the file
|
|
80
|
+
create_new: If True, create a new instance
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Parser instance or None if language not supported
|
|
84
|
+
"""
|
|
85
|
+
language = detect_language(filepath)
|
|
86
|
+
return self.get_parser(language, create_new)
|
|
87
|
+
|
|
88
|
+
def supports_language(self, language: Language) -> bool:
|
|
89
|
+
"""Check if a language is supported.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
language: The language to check
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
True if a parser is registered for this language
|
|
96
|
+
"""
|
|
97
|
+
return language in self._parsers
|
|
98
|
+
|
|
99
|
+
def supports_file(self, filepath: str) -> bool:
|
|
100
|
+
"""Check if a file type is supported.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
filepath: Path to the file
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
True if the file's language has a registered parser
|
|
107
|
+
"""
|
|
108
|
+
language = detect_language(filepath)
|
|
109
|
+
return self.supports_language(language)
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def supported_languages(self) -> list:
|
|
113
|
+
"""Get list of supported languages."""
|
|
114
|
+
return list(self._parsers.keys())
|
|
115
|
+
|
|
116
|
+
def clear_instances(self):
|
|
117
|
+
"""Clear cached parser instances."""
|
|
118
|
+
self._instances.clear()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# Global registry instance
|
|
122
|
+
_registry = ParserRegistry()
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def detect_language(filepath: str) -> Language:
|
|
126
|
+
"""Detect programming language from file path.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
filepath: Path to the source file
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Language enum value
|
|
133
|
+
"""
|
|
134
|
+
_, ext = os.path.splitext(filepath)
|
|
135
|
+
return Language.from_extension(ext)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def get_parser(
|
|
139
|
+
filepath_or_language,
|
|
140
|
+
create_new: bool = False,
|
|
141
|
+
) -> Optional[BaseParser]:
|
|
142
|
+
"""Get a parser for a file or language.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
filepath_or_language: File path string or Language enum
|
|
146
|
+
create_new: If True, create a new parser instance
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Parser instance or None if not supported
|
|
150
|
+
"""
|
|
151
|
+
if isinstance(filepath_or_language, Language):
|
|
152
|
+
return _registry.get_parser(filepath_or_language, create_new)
|
|
153
|
+
return _registry.get_parser_for_file(filepath_or_language, create_new)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def register_parser(
|
|
157
|
+
language: Language,
|
|
158
|
+
parser_class: Type[BaseParser],
|
|
159
|
+
factory: Optional[Callable[[], BaseParser]] = None,
|
|
160
|
+
):
|
|
161
|
+
"""Register a parser in the global registry.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
language: The language this parser handles
|
|
165
|
+
parser_class: The parser class
|
|
166
|
+
factory: Optional factory function
|
|
167
|
+
"""
|
|
168
|
+
_registry.register(language, parser_class, factory)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def get_registry() -> ParserRegistry:
|
|
172
|
+
"""Get the global parser registry.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
The global ParserRegistry instance
|
|
176
|
+
"""
|
|
177
|
+
return _registry
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# Register built-in parsers
|
|
181
|
+
def _register_builtin_parsers():
|
|
182
|
+
"""Register the built-in parsers."""
|
|
183
|
+
from roma_debug.parsers.python_ast_parser import PythonAstParser
|
|
184
|
+
|
|
185
|
+
register_parser(Language.PYTHON, PythonAstParser)
|
|
186
|
+
|
|
187
|
+
# Import tree-sitter parser module to trigger its auto-registration
|
|
188
|
+
# This allows graceful degradation if tree-sitter is not installed
|
|
189
|
+
try:
|
|
190
|
+
import roma_debug.parsers.treesitter_parser # noqa: F401
|
|
191
|
+
except ImportError:
|
|
192
|
+
pass # tree-sitter not available, only Python will be supported
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# Auto-register built-in parsers on module import
|
|
196
|
+
_register_builtin_parsers()
|