roma-debug 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ """ROMA Debug Parsers - Multi-language source code parsing.
2
+
3
+ This package provides extensible parser support for extracting
4
+ semantic information from source files in multiple programming languages.
5
+ """
6
+
7
+ from roma_debug.parsers.base import BaseParser
8
+ from roma_debug.parsers.registry import (
9
+ get_parser,
10
+ detect_language,
11
+ register_parser,
12
+ ParserRegistry,
13
+ )
14
+
15
+ __all__ = [
16
+ "BaseParser",
17
+ "get_parser",
18
+ "detect_language",
19
+ "register_parser",
20
+ "ParserRegistry",
21
+ ]
@@ -0,0 +1,189 @@
1
+ """Abstract base class for language parsers.
2
+
3
+ All language-specific parsers must implement this interface.
4
+ """
5
+
6
+ from abc import ABC, abstractmethod
7
+ from typing import Optional, List
8
+
9
+ from roma_debug.core.models import Language, Symbol, Import
10
+
11
+
12
+ class BaseParser(ABC):
13
+ """Abstract base class for language parsers.
14
+
15
+ Each language parser must implement methods for:
16
+ - Parsing source code
17
+ - Finding symbols (functions, classes) at specific lines
18
+ - Extracting import statements
19
+ """
20
+
21
+ def __init__(self):
22
+ """Initialize the parser."""
23
+ self._source: Optional[str] = None
24
+ self._filepath: Optional[str] = None
25
+ self._lines: List[str] = []
26
+ self._parsed: bool = False
27
+
28
+ @property
29
+ @abstractmethod
30
+ def language(self) -> Language:
31
+ """Return the language this parser handles."""
32
+ ...
33
+
34
+ @property
35
+ def source(self) -> Optional[str]:
36
+ """Return the parsed source code."""
37
+ return self._source
38
+
39
+ @property
40
+ def filepath(self) -> Optional[str]:
41
+ """Return the file path being parsed."""
42
+ return self._filepath
43
+
44
+ @property
45
+ def lines(self) -> List[str]:
46
+ """Return source split into lines."""
47
+ return self._lines
48
+
49
+ @property
50
+ def is_parsed(self) -> bool:
51
+ """Check if a file has been successfully parsed."""
52
+ return self._parsed
53
+
54
+ @abstractmethod
55
+ def parse(self, source: str, filepath: str = "") -> bool:
56
+ """Parse source code.
57
+
58
+ Args:
59
+ source: The source code to parse
60
+ filepath: Optional file path for context
61
+
62
+ Returns:
63
+ True if parsing succeeded, False otherwise
64
+ """
65
+ ...
66
+
67
+ @abstractmethod
68
+ def find_enclosing_symbol(self, line_number: int) -> Optional[Symbol]:
69
+ """Find the innermost symbol containing the given line.
70
+
71
+ Args:
72
+ line_number: 1-based line number
73
+
74
+ Returns:
75
+ Symbol if found, None otherwise
76
+ """
77
+ ...
78
+
79
+ @abstractmethod
80
+ def extract_imports(self) -> List[Import]:
81
+ """Extract all import statements from the parsed source.
82
+
83
+ Returns:
84
+ List of Import objects
85
+ """
86
+ ...
87
+
88
+ def get_symbol_at_line(self, line_number: int) -> Optional[Symbol]:
89
+ """Alias for find_enclosing_symbol for backward compatibility."""
90
+ return self.find_enclosing_symbol(line_number)
91
+
92
+ def get_line_content(self, line_number: int) -> Optional[str]:
93
+ """Get the content of a specific line.
94
+
95
+ Args:
96
+ line_number: 1-based line number
97
+
98
+ Returns:
99
+ Line content or None if out of range
100
+ """
101
+ if not self._lines:
102
+ return None
103
+ idx = line_number - 1
104
+ if 0 <= idx < len(self._lines):
105
+ return self._lines[idx]
106
+ return None
107
+
108
+ def get_line_range(self, start: int, end: int) -> List[str]:
109
+ """Get a range of lines.
110
+
111
+ Args:
112
+ start: 1-based start line (inclusive)
113
+ end: 1-based end line (inclusive)
114
+
115
+ Returns:
116
+ List of line contents
117
+ """
118
+ if not self._lines:
119
+ return []
120
+ start_idx = max(0, start - 1)
121
+ end_idx = min(len(self._lines), end)
122
+ return self._lines[start_idx:end_idx]
123
+
124
+ def format_snippet(
125
+ self,
126
+ start_line: int,
127
+ end_line: int,
128
+ highlight_line: Optional[int] = None,
129
+ with_line_numbers: bool = True,
130
+ ) -> str:
131
+ """Format a code snippet with optional line numbers and highlighting.
132
+
133
+ Args:
134
+ start_line: 1-based start line
135
+ end_line: 1-based end line
136
+ highlight_line: Optional line to highlight with >>
137
+ with_line_numbers: Whether to include line numbers
138
+
139
+ Returns:
140
+ Formatted snippet string
141
+ """
142
+ lines = self.get_line_range(start_line, end_line)
143
+ result = []
144
+
145
+ for i, line in enumerate(lines):
146
+ line_num = start_line + i
147
+ if with_line_numbers:
148
+ marker = " >> " if line_num == highlight_line else " "
149
+ result.append(f"{marker}{line_num:4d} | {line}")
150
+ else:
151
+ marker = ">> " if line_num == highlight_line else " "
152
+ result.append(f"{marker}{line}")
153
+
154
+ return "\n".join(result)
155
+
156
+ def extract_symbol_code(
157
+ self,
158
+ symbol: Symbol,
159
+ include_decorators: bool = True,
160
+ context_before: int = 0,
161
+ context_after: int = 0,
162
+ ) -> str:
163
+ """Extract the full code for a symbol.
164
+
165
+ Args:
166
+ symbol: The symbol to extract
167
+ include_decorators: Whether to include decorator lines
168
+ context_before: Extra lines before the symbol
169
+ context_after: Extra lines after the symbol
170
+
171
+ Returns:
172
+ The symbol's source code
173
+ """
174
+ start = symbol.start_line
175
+ if include_decorators and symbol.decorators:
176
+ # Decorators are typically on lines before the definition
177
+ start = max(1, start - len(symbol.decorators))
178
+
179
+ start = max(1, start - context_before)
180
+ end = min(len(self._lines), symbol.end_line + context_after)
181
+
182
+ return "\n".join(self.get_line_range(start, end))
183
+
184
+ def reset(self):
185
+ """Reset parser state."""
186
+ self._source = None
187
+ self._filepath = None
188
+ self._lines = []
189
+ self._parsed = False
@@ -0,0 +1,268 @@
1
+ """Python AST-based parser implementation.
2
+
3
+ Uses Python's built-in ast module for parsing Python source code.
4
+ This is the legacy fallback parser that provides reliable Python parsing.
5
+ """
6
+
7
+ import ast
8
+ from typing import Optional, List
9
+
10
+ from roma_debug.core.models import Language, Symbol, Import
11
+ from roma_debug.parsers.base import BaseParser
12
+
13
+
14
+ class PythonAstParser(BaseParser):
15
+ """Python parser using the built-in ast module.
16
+
17
+ This parser provides reliable Python parsing using Python's own
18
+ AST module. It's used as the primary Python parser and as a
19
+ fallback when tree-sitter is unavailable.
20
+ """
21
+
22
+ def __init__(self):
23
+ """Initialize the Python AST parser."""
24
+ super().__init__()
25
+ self._tree: Optional[ast.AST] = None
26
+ self._symbols: List[Symbol] = []
27
+ self._imports: List[Import] = []
28
+
29
+ @property
30
+ def language(self) -> Language:
31
+ """Return Python as the language."""
32
+ return Language.PYTHON
33
+
34
+ def parse(self, source: str, filepath: str = "") -> bool:
35
+ """Parse Python source code using ast.
36
+
37
+ Args:
38
+ source: Python source code
39
+ filepath: Optional file path
40
+
41
+ Returns:
42
+ True if parsing succeeded
43
+ """
44
+ self.reset()
45
+ self._source = source
46
+ self._filepath = filepath
47
+ self._lines = source.splitlines()
48
+
49
+ try:
50
+ self._tree = ast.parse(source)
51
+ self._parsed = True
52
+ self._extract_symbols()
53
+ self._extract_imports_internal()
54
+ return True
55
+ except SyntaxError:
56
+ return False
57
+
58
+ def reset(self):
59
+ """Reset parser state."""
60
+ super().reset()
61
+ self._tree = None
62
+ self._symbols = []
63
+ self._imports = []
64
+
65
+ def _extract_symbols(self):
66
+ """Extract all function and class symbols from the AST."""
67
+ if self._tree is None:
68
+ return
69
+
70
+ def visit_node(node: ast.AST, parent: Optional[Symbol] = None):
71
+ symbol = None
72
+
73
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
74
+ kind = "async_function" if isinstance(node, ast.AsyncFunctionDef) else "function"
75
+ if parent and parent.kind == "class":
76
+ kind = "method"
77
+
78
+ # Extract decorators
79
+ decorators = []
80
+ for dec in node.decorator_list:
81
+ if isinstance(dec, ast.Name):
82
+ decorators.append(dec.id)
83
+ elif isinstance(dec, ast.Attribute):
84
+ decorators.append(ast.unparse(dec) if hasattr(ast, 'unparse') else str(dec.attr))
85
+ elif isinstance(dec, ast.Call):
86
+ if isinstance(dec.func, ast.Name):
87
+ decorators.append(dec.func.id)
88
+ elif isinstance(dec.func, ast.Attribute):
89
+ decorators.append(dec.func.attr)
90
+
91
+ # Extract docstring
92
+ docstring = ast.get_docstring(node)
93
+
94
+ symbol = Symbol(
95
+ name=node.name,
96
+ kind=kind,
97
+ start_line=node.lineno,
98
+ end_line=node.end_lineno or node.lineno,
99
+ start_col=node.col_offset,
100
+ end_col=node.end_col_offset or 0,
101
+ parent=parent,
102
+ decorators=decorators,
103
+ docstring=docstring,
104
+ )
105
+ self._symbols.append(symbol)
106
+
107
+ elif isinstance(node, ast.ClassDef):
108
+ # Extract decorators
109
+ decorators = []
110
+ for dec in node.decorator_list:
111
+ if isinstance(dec, ast.Name):
112
+ decorators.append(dec.id)
113
+ elif isinstance(dec, ast.Attribute):
114
+ decorators.append(ast.unparse(dec) if hasattr(ast, 'unparse') else str(dec.attr))
115
+
116
+ # Extract docstring
117
+ docstring = ast.get_docstring(node)
118
+
119
+ symbol = Symbol(
120
+ name=node.name,
121
+ kind="class",
122
+ start_line=node.lineno,
123
+ end_line=node.end_lineno or node.lineno,
124
+ start_col=node.col_offset,
125
+ end_col=node.end_col_offset or 0,
126
+ parent=parent,
127
+ decorators=decorators,
128
+ docstring=docstring,
129
+ )
130
+ self._symbols.append(symbol)
131
+
132
+ # Visit children
133
+ new_parent = symbol if symbol else parent
134
+ for child in ast.iter_child_nodes(node):
135
+ visit_node(child, new_parent)
136
+
137
+ visit_node(self._tree)
138
+
139
+ def _extract_imports_internal(self):
140
+ """Extract import statements from the AST."""
141
+ if self._tree is None:
142
+ return
143
+
144
+ for node in ast.walk(self._tree):
145
+ if isinstance(node, ast.Import):
146
+ for alias in node.names:
147
+ self._imports.append(Import(
148
+ module_name=alias.name,
149
+ alias=alias.asname,
150
+ imported_names=[],
151
+ is_relative=False,
152
+ relative_level=0,
153
+ line_number=node.lineno,
154
+ language=Language.PYTHON,
155
+ ))
156
+
157
+ elif isinstance(node, ast.ImportFrom):
158
+ module = node.module or ""
159
+ imported_names = [alias.name for alias in node.names]
160
+ aliases = {alias.name: alias.asname for alias in node.names if alias.asname}
161
+
162
+ self._imports.append(Import(
163
+ module_name=module,
164
+ alias=None,
165
+ imported_names=imported_names,
166
+ is_relative=node.level > 0,
167
+ relative_level=node.level,
168
+ line_number=node.lineno,
169
+ language=Language.PYTHON,
170
+ ))
171
+
172
+ def find_enclosing_symbol(self, line_number: int) -> Optional[Symbol]:
173
+ """Find the innermost symbol containing the given line.
174
+
175
+ Args:
176
+ line_number: 1-based line number
177
+
178
+ Returns:
179
+ The innermost Symbol containing the line, or None
180
+ """
181
+ best_match: Optional[Symbol] = None
182
+ best_size = float('inf')
183
+
184
+ for symbol in self._symbols:
185
+ if symbol.contains_line(line_number):
186
+ size = symbol.end_line - symbol.start_line
187
+ if size < best_size:
188
+ best_match = symbol
189
+ best_size = size
190
+
191
+ return best_match
192
+
193
+ def extract_imports(self) -> List[Import]:
194
+ """Return all extracted imports.
195
+
196
+ Returns:
197
+ List of Import objects
198
+ """
199
+ return self._imports.copy()
200
+
201
+ def find_all_symbols(self) -> List[Symbol]:
202
+ """Return all extracted symbols.
203
+
204
+ Returns:
205
+ List of all Symbol objects
206
+ """
207
+ return self._symbols.copy()
208
+
209
+ def find_symbols_by_name(self, name: str) -> List[Symbol]:
210
+ """Find all symbols with the given name.
211
+
212
+ Args:
213
+ name: Symbol name to search for
214
+
215
+ Returns:
216
+ List of matching Symbol objects
217
+ """
218
+ return [s for s in self._symbols if s.name == name]
219
+
220
+ def find_symbols_by_kind(self, kind: str) -> List[Symbol]:
221
+ """Find all symbols of a given kind.
222
+
223
+ Args:
224
+ kind: Symbol kind ('function', 'class', 'method', etc.)
225
+
226
+ Returns:
227
+ List of matching Symbol objects
228
+ """
229
+ return [s for s in self._symbols if s.kind == kind]
230
+
231
+ def get_function_calls_in_symbol(self, symbol: Symbol) -> List[str]:
232
+ """Extract function/method calls within a symbol.
233
+
234
+ Args:
235
+ symbol: The symbol to analyze
236
+
237
+ Returns:
238
+ List of called function/method names
239
+ """
240
+ if self._tree is None:
241
+ return []
242
+
243
+ calls = []
244
+
245
+ def find_symbol_node(node: ast.AST) -> Optional[ast.AST]:
246
+ """Find the AST node for the given symbol."""
247
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
248
+ if node.name == symbol.name and node.lineno == symbol.start_line:
249
+ return node
250
+ for child in ast.iter_child_nodes(node):
251
+ result = find_symbol_node(child)
252
+ if result:
253
+ return result
254
+ return None
255
+
256
+ symbol_node = find_symbol_node(self._tree)
257
+ if symbol_node is None:
258
+ return []
259
+
260
+ for node in ast.walk(symbol_node):
261
+ if isinstance(node, ast.Call):
262
+ if isinstance(node.func, ast.Name):
263
+ calls.append(node.func.id)
264
+ elif isinstance(node.func, ast.Attribute):
265
+ # For method calls like obj.method()
266
+ calls.append(node.func.attr)
267
+
268
+ return calls
@@ -0,0 +1,196 @@
1
+ """Parser registry for language detection and parser dispatch.
2
+
3
+ Provides centralized parser management and language detection from file extensions.
4
+ """
5
+
6
+ import os
7
+ from typing import Dict, Optional, Type, Callable
8
+
9
+ from roma_debug.core.models import Language
10
+ from roma_debug.parsers.base import BaseParser
11
+
12
+
13
+ class ParserRegistry:
14
+ """Registry for language parsers.
15
+
16
+ Manages parser registration and provides parser lookup by language
17
+ or file extension.
18
+ """
19
+
20
+ def __init__(self):
21
+ """Initialize the registry."""
22
+ self._parsers: Dict[Language, Type[BaseParser]] = {}
23
+ self._parser_factories: Dict[Language, Callable[[], BaseParser]] = {}
24
+ self._instances: Dict[Language, BaseParser] = {}
25
+
26
+ def register(
27
+ self,
28
+ language: Language,
29
+ parser_class: Type[BaseParser],
30
+ factory: Optional[Callable[[], BaseParser]] = None,
31
+ ):
32
+ """Register a parser for a language.
33
+
34
+ Args:
35
+ language: The language this parser handles
36
+ parser_class: The parser class
37
+ factory: Optional factory function to create parser instances
38
+ """
39
+ self._parsers[language] = parser_class
40
+ if factory:
41
+ self._parser_factories[language] = factory
42
+
43
+ def get_parser(
44
+ self,
45
+ language: Language,
46
+ create_new: bool = False,
47
+ ) -> Optional[BaseParser]:
48
+ """Get a parser for the given language.
49
+
50
+ Args:
51
+ language: The language to get a parser for
52
+ create_new: If True, create a new instance instead of reusing
53
+
54
+ Returns:
55
+ Parser instance or None if no parser registered
56
+ """
57
+ if language not in self._parsers:
58
+ return None
59
+
60
+ if create_new or language not in self._instances:
61
+ if language in self._parser_factories:
62
+ parser = self._parser_factories[language]()
63
+ else:
64
+ parser = self._parsers[language]()
65
+ if not create_new:
66
+ self._instances[language] = parser
67
+ return parser
68
+
69
+ return self._instances[language]
70
+
71
+ def get_parser_for_file(
72
+ self,
73
+ filepath: str,
74
+ create_new: bool = False,
75
+ ) -> Optional[BaseParser]:
76
+ """Get a parser based on file extension.
77
+
78
+ Args:
79
+ filepath: Path to the file
80
+ create_new: If True, create a new instance
81
+
82
+ Returns:
83
+ Parser instance or None if language not supported
84
+ """
85
+ language = detect_language(filepath)
86
+ return self.get_parser(language, create_new)
87
+
88
+ def supports_language(self, language: Language) -> bool:
89
+ """Check if a language is supported.
90
+
91
+ Args:
92
+ language: The language to check
93
+
94
+ Returns:
95
+ True if a parser is registered for this language
96
+ """
97
+ return language in self._parsers
98
+
99
+ def supports_file(self, filepath: str) -> bool:
100
+ """Check if a file type is supported.
101
+
102
+ Args:
103
+ filepath: Path to the file
104
+
105
+ Returns:
106
+ True if the file's language has a registered parser
107
+ """
108
+ language = detect_language(filepath)
109
+ return self.supports_language(language)
110
+
111
+ @property
112
+ def supported_languages(self) -> list:
113
+ """Get list of supported languages."""
114
+ return list(self._parsers.keys())
115
+
116
+ def clear_instances(self):
117
+ """Clear cached parser instances."""
118
+ self._instances.clear()
119
+
120
+
121
+ # Global registry instance
122
+ _registry = ParserRegistry()
123
+
124
+
125
+ def detect_language(filepath: str) -> Language:
126
+ """Detect programming language from file path.
127
+
128
+ Args:
129
+ filepath: Path to the source file
130
+
131
+ Returns:
132
+ Language enum value
133
+ """
134
+ _, ext = os.path.splitext(filepath)
135
+ return Language.from_extension(ext)
136
+
137
+
138
+ def get_parser(
139
+ filepath_or_language,
140
+ create_new: bool = False,
141
+ ) -> Optional[BaseParser]:
142
+ """Get a parser for a file or language.
143
+
144
+ Args:
145
+ filepath_or_language: File path string or Language enum
146
+ create_new: If True, create a new parser instance
147
+
148
+ Returns:
149
+ Parser instance or None if not supported
150
+ """
151
+ if isinstance(filepath_or_language, Language):
152
+ return _registry.get_parser(filepath_or_language, create_new)
153
+ return _registry.get_parser_for_file(filepath_or_language, create_new)
154
+
155
+
156
+ def register_parser(
157
+ language: Language,
158
+ parser_class: Type[BaseParser],
159
+ factory: Optional[Callable[[], BaseParser]] = None,
160
+ ):
161
+ """Register a parser in the global registry.
162
+
163
+ Args:
164
+ language: The language this parser handles
165
+ parser_class: The parser class
166
+ factory: Optional factory function
167
+ """
168
+ _registry.register(language, parser_class, factory)
169
+
170
+
171
+ def get_registry() -> ParserRegistry:
172
+ """Get the global parser registry.
173
+
174
+ Returns:
175
+ The global ParserRegistry instance
176
+ """
177
+ return _registry
178
+
179
+
180
+ # Register built-in parsers
181
+ def _register_builtin_parsers():
182
+ """Register the built-in parsers."""
183
+ from roma_debug.parsers.python_ast_parser import PythonAstParser
184
+
185
+ register_parser(Language.PYTHON, PythonAstParser)
186
+
187
+ # Import tree-sitter parser module to trigger its auto-registration
188
+ # This allows graceful degradation if tree-sitter is not installed
189
+ try:
190
+ import roma_debug.parsers.treesitter_parser # noqa: F401
191
+ except ImportError:
192
+ pass # tree-sitter not available, only Python will be supported
193
+
194
+
195
+ # Auto-register built-in parsers on module import
196
+ _register_builtin_parsers()