roma-debug 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,314 @@
1
+ """Multi-language traceback/stack trace patterns.
2
+
3
+ Provides regex patterns and parsers for extracting file locations
4
+ from error tracebacks in multiple programming languages.
5
+ """
6
+
7
+ import re
8
+ from typing import Optional, List, Tuple, Pattern
9
+
10
+ from roma_debug.core.models import Language, TraceFrame, ParsedTraceback
11
+
12
+
13
+ # Compiled regex patterns for each language's traceback format
14
+ TRACEBACK_PATTERNS: dict[Language, List[Pattern]] = {
15
+ # Python: File "path/to/file.py", line 10, in function_name
16
+ Language.PYTHON: [
17
+ re.compile(r'File ["\'](.+?)["\'], line (\d+)(?:, in (\w+))?'),
18
+ # Alternative format sometimes seen
19
+ re.compile(r'^\s+(.+\.py):(\d+)'),
20
+ ],
21
+
22
+ # JavaScript/Node.js: at functionName (path/to/file.js:10:5) or path/to/file.js:10:5
23
+ Language.JAVASCRIPT: [
24
+ re.compile(r'at\s+(?:(\w+(?:\.\w+)*)\s+)?\(?(.+?):(\d+):(\d+)\)?'),
25
+ re.compile(r'^\s+at\s+(.+?):(\d+):(\d+)'),
26
+ # Chrome DevTools format
27
+ re.compile(r'(\w+)?@(.+?):(\d+):(\d+)'),
28
+ ],
29
+
30
+ # TypeScript: same as JavaScript but with .ts extension
31
+ Language.TYPESCRIPT: [
32
+ re.compile(r'at\s+(?:(\w+(?:\.\w+)*)\s+)?\(?(.+?\.tsx?):(\d+):(\d+)\)?'),
33
+ re.compile(r'^\s+at\s+(.+?\.tsx?):(\d+):(\d+)'),
34
+ ],
35
+
36
+ # Go: goroutine N [running]:
37
+ # path/to/file.go:123 +0x1a2
38
+ # main.functionName(...)
39
+ Language.GO: [
40
+ re.compile(r'^\s*(.+\.go):(\d+)(?:\s+\+0x[0-9a-f]+)?', re.MULTILINE),
41
+ re.compile(r'[\t\s]+(/?\S+\.go):(\d+)', re.MULTILINE),
42
+ # panic location
43
+ re.compile(r'panic.*at\s+(.+\.go):(\d+)'),
44
+ ],
45
+
46
+ # Rust: panicked at 'message', src/main.rs:10:5
47
+ # thread 'main' panicked at src/main.rs:10:5
48
+ Language.RUST: [
49
+ re.compile(r"panicked at ['\"]?(.+?\.rs)['\"]?:(\d+):(\d+)"),
50
+ re.compile(r"panicked at .+?, (.+?\.rs):(\d+):(\d+)"),
51
+ # Backtrace format
52
+ re.compile(r'^\s*\d+:\s+.+\s+at\s+(.+?\.rs):(\d+):(\d+)'),
53
+ re.compile(r'^\s+(\S+\.rs):(\d+)'),
54
+ ],
55
+
56
+ # Java: at com.example.Class.method(File.java:10)
57
+ Language.JAVA: [
58
+ re.compile(r'at\s+([\w$.]+)\(([\w]+\.java):(\d+)\)'),
59
+ re.compile(r'at\s+([\w$.]+)\((\w+\.java):(\d+)\)'),
60
+ # Kotlin (often mixed with Java)
61
+ re.compile(r'at\s+([\w$.]+)\(([\w]+\.kt):(\d+)\)'),
62
+ ],
63
+
64
+ # C#/.NET: at Namespace.Class.Method() in path/to/file.cs:line 10
65
+ Language.CSHARP: [
66
+ re.compile(r'at\s+([\w.]+)\(\)\s+in\s+(.+?\.cs):line\s+(\d+)'),
67
+ re.compile(r'at\s+([\w.]+)\s+in\s+(.+?\.cs):(\d+)'),
68
+ ],
69
+
70
+ # Ruby: from path/to/file.rb:10:in `method_name'
71
+ Language.RUBY: [
72
+ re.compile(r"from (.+?\.rb):(\d+)(?::in [`'](\w+)')?"),
73
+ re.compile(r"^\s*(.+?\.rb):(\d+):in [`'](\w+)'"),
74
+ ],
75
+
76
+ # PHP: in /path/to/file.php on line 10
77
+ # /path/to/file.php(10): function()
78
+ Language.PHP: [
79
+ re.compile(r'in\s+(.+?\.php)\s+on\s+line\s+(\d+)'),
80
+ re.compile(r'(.+?\.php)\((\d+)\):\s*(\w+)?'),
81
+ ],
82
+ }
83
+
84
+ # Error type patterns for each language
85
+ ERROR_TYPE_PATTERNS: dict[Language, List[Pattern]] = {
86
+ Language.PYTHON: [
87
+ re.compile(r'^(\w+Error):\s*(.+)$', re.MULTILINE),
88
+ re.compile(r'^(\w+Exception):\s*(.+)$', re.MULTILINE),
89
+ re.compile(r'^(\w+Warning):\s*(.+)$', re.MULTILINE),
90
+ ],
91
+ Language.JAVASCRIPT: [
92
+ re.compile(r'^(\w*Error):\s*(.+)$', re.MULTILINE),
93
+ re.compile(r'^Uncaught\s+(\w+):\s*(.+)$', re.MULTILINE),
94
+ ],
95
+ Language.TYPESCRIPT: [
96
+ re.compile(r'^(\w*Error):\s*(.+)$', re.MULTILINE),
97
+ re.compile(r'^TSError:\s*(.+)$', re.MULTILINE),
98
+ ],
99
+ Language.GO: [
100
+ re.compile(r'^panic:\s*(.+)$', re.MULTILINE),
101
+ re.compile(r'^fatal error:\s*(.+)$', re.MULTILINE),
102
+ ],
103
+ Language.RUST: [
104
+ re.compile(r"thread '[\w-]+' panicked at ['\"](.+?)['\"]", re.MULTILINE),
105
+ re.compile(r'^error\[E\d+\]:\s*(.+)$', re.MULTILINE),
106
+ ],
107
+ Language.JAVA: [
108
+ re.compile(r'^([\w.]+Exception):\s*(.+)$', re.MULTILINE),
109
+ re.compile(r'^([\w.]+Error):\s*(.+)$', re.MULTILINE),
110
+ re.compile(r'^Caused by:\s*([\w.]+):\s*(.+)$', re.MULTILINE),
111
+ ],
112
+ }
113
+
114
+
115
+ def detect_traceback_language(traceback: str) -> Language:
116
+ """Detect the language of a traceback from its format.
117
+
118
+ Args:
119
+ traceback: The traceback/stack trace string
120
+
121
+ Returns:
122
+ Detected Language enum value
123
+ """
124
+ # Check for language-specific indicators
125
+ indicators = [
126
+ (Language.PYTHON, ['File "', "Traceback (most recent call last):", ".py\", line"]),
127
+ (Language.JAVASCRIPT, ["at ", ".js:", "node_modules/", "Error:", " at "]),
128
+ (Language.TYPESCRIPT, [".ts:", ".tsx:", "TSError"]),
129
+ (Language.GO, ["goroutine", ".go:", "panic:", "runtime error:"]),
130
+ (Language.RUST, ["panicked at", ".rs:", "thread '", "RUST_BACKTRACE"]),
131
+ (Language.JAVA, [".java:", "at ", "Exception", "Caused by:"]),
132
+ (Language.CSHARP, [".cs:", "at ", " in ", ":line "]),
133
+ (Language.RUBY, [".rb:", "from ", ":in `"]),
134
+ (Language.PHP, [".php", "on line", "Stack trace:"]),
135
+ ]
136
+
137
+ scores = {lang: 0 for lang, _ in indicators}
138
+
139
+ for lang, keywords in indicators:
140
+ for keyword in keywords:
141
+ if keyword in traceback:
142
+ scores[lang] += 1
143
+
144
+ # Return language with highest score
145
+ best_lang = max(scores, key=scores.get)
146
+ if scores[best_lang] > 0:
147
+ return best_lang
148
+
149
+ return Language.UNKNOWN
150
+
151
+
152
+ def parse_traceback(traceback: str, language: Optional[Language] = None) -> ParsedTraceback:
153
+ """Parse a traceback string into structured data.
154
+
155
+ Args:
156
+ traceback: The traceback/stack trace string
157
+ language: Optional language hint (auto-detected if not provided)
158
+
159
+ Returns:
160
+ ParsedTraceback with frames and error info
161
+ """
162
+ if language is None:
163
+ language = detect_traceback_language(traceback)
164
+
165
+ frames = extract_frames(traceback, language)
166
+ error_type, error_message = extract_error_info(traceback, language)
167
+
168
+ return ParsedTraceback(
169
+ frames=frames,
170
+ error_type=error_type,
171
+ error_message=error_message,
172
+ language=language,
173
+ raw_traceback=traceback,
174
+ )
175
+
176
+
177
+ def extract_frames(traceback: str, language: Language) -> List[TraceFrame]:
178
+ """Extract stack frames from a traceback.
179
+
180
+ Args:
181
+ traceback: The traceback string
182
+ language: The language of the traceback
183
+
184
+ Returns:
185
+ List of TraceFrame objects
186
+ """
187
+ frames = []
188
+ patterns = TRACEBACK_PATTERNS.get(language, [])
189
+
190
+ # Also try unknown patterns (generic file:line format)
191
+ if language == Language.UNKNOWN:
192
+ patterns = [
193
+ re.compile(r'(?:at\s+)?(.+?):(\d+)(?::(\d+))?'),
194
+ ]
195
+
196
+ for pattern in patterns:
197
+ for match in pattern.finditer(traceback):
198
+ groups = match.groups()
199
+
200
+ # Different languages have different group structures
201
+ if language == Language.PYTHON:
202
+ filepath = groups[0]
203
+ line_number = int(groups[1])
204
+ function_name = groups[2] if len(groups) > 2 else None
205
+ column = None
206
+
207
+ elif language in (Language.JAVASCRIPT, Language.TYPESCRIPT):
208
+ # JS format: function, file, line, column or file, line, column
209
+ if len(groups) >= 4:
210
+ function_name = groups[0]
211
+ filepath = groups[1]
212
+ line_number = int(groups[2])
213
+ column = int(groups[3]) if groups[3] else None
214
+ else:
215
+ function_name = None
216
+ filepath = groups[0]
217
+ line_number = int(groups[1])
218
+ column = int(groups[2]) if len(groups) > 2 and groups[2] else None
219
+
220
+ elif language == Language.GO:
221
+ filepath = groups[0]
222
+ line_number = int(groups[1])
223
+ function_name = None
224
+ column = None
225
+
226
+ elif language == Language.RUST:
227
+ filepath = groups[0]
228
+ line_number = int(groups[1])
229
+ column = int(groups[2]) if len(groups) > 2 and groups[2] else None
230
+ function_name = None
231
+
232
+ elif language == Language.JAVA:
233
+ # Java format: class.method, file, line
234
+ function_name = groups[0] if groups[0] else None
235
+ filepath = groups[1]
236
+ line_number = int(groups[2])
237
+ column = None
238
+
239
+ else:
240
+ # Generic fallback
241
+ filepath = groups[0]
242
+ line_number = int(groups[1]) if len(groups) > 1 and groups[1] else 0
243
+ column = int(groups[2]) if len(groups) > 2 and groups[2] else None
244
+ function_name = None
245
+
246
+ # Skip if we couldn't get a valid file and line
247
+ if not filepath or not line_number:
248
+ continue
249
+
250
+ frame = TraceFrame(
251
+ filepath=filepath,
252
+ line_number=line_number,
253
+ function_name=function_name,
254
+ column_number=column,
255
+ language=language,
256
+ )
257
+ frames.append(frame)
258
+
259
+ return frames
260
+
261
+
262
+ def extract_error_info(traceback: str, language: Language) -> Tuple[Optional[str], Optional[str]]:
263
+ """Extract error type and message from a traceback.
264
+
265
+ Args:
266
+ traceback: The traceback string
267
+ language: The language of the traceback
268
+
269
+ Returns:
270
+ Tuple of (error_type, error_message)
271
+ """
272
+ patterns = ERROR_TYPE_PATTERNS.get(language, [])
273
+
274
+ for pattern in patterns:
275
+ match = pattern.search(traceback)
276
+ if match:
277
+ groups = match.groups()
278
+ if len(groups) >= 2:
279
+ return groups[0], groups[1]
280
+ elif len(groups) == 1:
281
+ return None, groups[0]
282
+
283
+ # Generic fallback: look for common error patterns
284
+ generic_patterns = [
285
+ re.compile(r'^Error:\s*(.+)$', re.MULTILINE),
286
+ re.compile(r'^Exception:\s*(.+)$', re.MULTILINE),
287
+ re.compile(r'^fatal:\s*(.+)$', re.MULTILINE),
288
+ ]
289
+
290
+ for pattern in generic_patterns:
291
+ match = pattern.search(traceback)
292
+ if match:
293
+ return None, match.group(1)
294
+
295
+ return None, None
296
+
297
+
298
+ def extract_file_line_pairs(traceback: str, language: Optional[Language] = None) -> List[Tuple[str, int]]:
299
+ """Extract (filepath, line_number) pairs from a traceback.
300
+
301
+ Simple helper function for basic extraction without full parsing.
302
+
303
+ Args:
304
+ traceback: The traceback string
305
+ language: Optional language hint
306
+
307
+ Returns:
308
+ List of (filepath, line_number) tuples
309
+ """
310
+ if language is None:
311
+ language = detect_traceback_language(traceback)
312
+
313
+ frames = extract_frames(traceback, language)
314
+ return [(f.filepath, f.line_number) for f in frames]