roma-debug 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,569 @@
1
+ """Project scanner for deep project awareness.
2
+
3
+ Scans project structure to understand:
4
+ - Project type (Flask, FastAPI, Express, etc.)
5
+ - Entry points (main.py, app.py, server.py)
6
+ - File structure and relationships
7
+ - Configuration files
8
+ """
9
+
10
+ import os
11
+ import re
12
+ import json
13
+ from dataclasses import dataclass, field
14
+ from typing import Optional, List, Dict, Set
15
+ from pathlib import Path
16
+
17
+ from roma_debug.core.models import Language
18
+
19
+
20
+ @dataclass
21
+ class ProjectFile:
22
+ """Represents a file in the project."""
23
+ path: str
24
+ language: Language
25
+ is_entry_point: bool = False
26
+ is_config: bool = False
27
+ size: int = 0
28
+
29
+ @property
30
+ def filename(self) -> str:
31
+ return os.path.basename(self.path)
32
+
33
+ @property
34
+ def relative_path(self) -> str:
35
+ return self.path
36
+
37
+
38
+ @dataclass
39
+ class ProjectInfo:
40
+ """Information about a scanned project."""
41
+ root: str
42
+ project_type: str # 'flask', 'fastapi', 'express', 'go', 'rust', 'unknown'
43
+ primary_language: Language
44
+ entry_points: List[ProjectFile] = field(default_factory=list)
45
+ source_files: List[ProjectFile] = field(default_factory=list)
46
+ config_files: List[ProjectFile] = field(default_factory=list)
47
+ frameworks_detected: List[str] = field(default_factory=list)
48
+
49
+ def get_files_by_language(self, language: Language) -> List[ProjectFile]:
50
+ """Get all files of a specific language."""
51
+ return [f for f in self.source_files if f.language == language]
52
+
53
+ def find_file(self, filename: str) -> Optional[ProjectFile]:
54
+ """Find a file by name (partial match)."""
55
+ filename_lower = filename.lower()
56
+ for f in self.source_files:
57
+ if filename_lower in f.path.lower():
58
+ return f
59
+ return None
60
+
61
+ def find_files_by_pattern(self, pattern: str) -> List[ProjectFile]:
62
+ """Find files matching a regex pattern."""
63
+ regex = re.compile(pattern, re.IGNORECASE)
64
+ return [f for f in self.source_files if regex.search(f.path)]
65
+
66
+ def to_summary(self) -> str:
67
+ """Generate a human-readable summary."""
68
+ lines = [
69
+ f"Project Type: {self.project_type}",
70
+ f"Primary Language: {self.primary_language.value}",
71
+ f"Frameworks: {', '.join(self.frameworks_detected) or 'None detected'}",
72
+ f"Entry Points: {len(self.entry_points)}",
73
+ f"Source Files: {len(self.source_files)}",
74
+ ]
75
+
76
+ if self.entry_points:
77
+ lines.append("\nEntry Points:")
78
+ for ep in self.entry_points[:5]:
79
+ lines.append(f" - {ep.path}")
80
+
81
+ return "\n".join(lines)
82
+
83
+
84
+ # Common entry point file patterns
85
+ ENTRY_POINT_PATTERNS = {
86
+ Language.PYTHON: [
87
+ r'^main\.py$',
88
+ r'^app\.py$',
89
+ r'^server\.py$',
90
+ r'^run\.py$',
91
+ r'^wsgi\.py$',
92
+ r'^asgi\.py$',
93
+ r'^manage\.py$',
94
+ r'^__main__\.py$',
95
+ r'^index\.py$',
96
+ ],
97
+ Language.JAVASCRIPT: [
98
+ r'^index\.js$',
99
+ r'^app\.js$',
100
+ r'^server\.js$',
101
+ r'^main\.js$',
102
+ r'^src/index\.js$',
103
+ ],
104
+ Language.TYPESCRIPT: [
105
+ r'^index\.ts$',
106
+ r'^app\.ts$',
107
+ r'^server\.ts$',
108
+ r'^main\.ts$',
109
+ r'^src/index\.ts$',
110
+ ],
111
+ Language.GO: [
112
+ r'^main\.go$',
113
+ r'^cmd/.*\.go$',
114
+ ],
115
+ Language.RUST: [
116
+ r'^main\.rs$',
117
+ r'^lib\.rs$',
118
+ r'^src/main\.rs$',
119
+ r'^src/lib\.rs$',
120
+ ],
121
+ Language.JAVA: [
122
+ r'^Main\.java$',
123
+ r'^App\.java$',
124
+ r'^Application\.java$',
125
+ ],
126
+ }
127
+
128
+ # Framework detection patterns (file content)
129
+ FRAMEWORK_PATTERNS = {
130
+ 'flask': [
131
+ (r'from\s+flask\s+import', Language.PYTHON),
132
+ (r'import\s+flask', Language.PYTHON),
133
+ (r'Flask\s*\(', Language.PYTHON),
134
+ ],
135
+ 'fastapi': [
136
+ (r'from\s+fastapi\s+import', Language.PYTHON),
137
+ (r'FastAPI\s*\(', Language.PYTHON),
138
+ ],
139
+ 'django': [
140
+ (r'from\s+django', Language.PYTHON),
141
+ (r'import\s+django', Language.PYTHON),
142
+ (r'DJANGO_SETTINGS_MODULE', Language.PYTHON),
143
+ ],
144
+ 'express': [
145
+ (r'require\s*\(\s*[\'"]express[\'"]\s*\)', Language.JAVASCRIPT),
146
+ (r'from\s+[\'"]express[\'"]', Language.JAVASCRIPT),
147
+ (r'express\s*\(\s*\)', Language.JAVASCRIPT),
148
+ ],
149
+ 'react': [
150
+ (r'from\s+[\'"]react[\'"]', Language.JAVASCRIPT),
151
+ (r'import\s+React', Language.JAVASCRIPT),
152
+ (r'React\.createElement', Language.JAVASCRIPT),
153
+ ],
154
+ 'vue': [
155
+ (r'from\s+[\'"]vue[\'"]', Language.JAVASCRIPT),
156
+ (r'createApp', Language.JAVASCRIPT),
157
+ (r'\.vue$', None), # File extension check
158
+ ],
159
+ 'gin': [
160
+ (r'github\.com/gin-gonic/gin', Language.GO),
161
+ ],
162
+ 'actix': [
163
+ (r'actix_web', Language.RUST),
164
+ ],
165
+ 'spring': [
166
+ (r'org\.springframework', Language.JAVA),
167
+ (r'@SpringBootApplication', Language.JAVA),
168
+ ],
169
+ }
170
+
171
+ # Config file patterns
172
+ CONFIG_FILES = [
173
+ 'package.json',
174
+ 'requirements.txt',
175
+ 'setup.py',
176
+ 'pyproject.toml',
177
+ 'Pipfile',
178
+ 'go.mod',
179
+ 'Cargo.toml',
180
+ 'pom.xml',
181
+ 'build.gradle',
182
+ '.env',
183
+ 'config.py',
184
+ 'config.js',
185
+ 'config.json',
186
+ 'settings.py',
187
+ 'docker-compose.yml',
188
+ 'Dockerfile',
189
+ ]
190
+
191
+ # Directories to skip
192
+ SKIP_DIRS = {
193
+ 'node_modules',
194
+ '__pycache__',
195
+ '.git',
196
+ '.svn',
197
+ '.hg',
198
+ 'venv',
199
+ 'env',
200
+ '.venv',
201
+ '.env',
202
+ 'dist',
203
+ 'build',
204
+ 'target',
205
+ '.idea',
206
+ '.vscode',
207
+ 'coverage',
208
+ '.pytest_cache',
209
+ '.mypy_cache',
210
+ 'eggs',
211
+ '*.egg-info',
212
+ }
213
+
214
+
215
+ class ProjectScanner:
216
+ """Scans and analyzes project structure."""
217
+
218
+ def __init__(self, project_root: str, max_files: int = 1000):
219
+ """Initialize the scanner.
220
+
221
+ Args:
222
+ project_root: Root directory of the project
223
+ max_files: Maximum number of files to scan
224
+ """
225
+ self.project_root = os.path.abspath(project_root)
226
+ self.max_files = max_files
227
+ self._project_info: Optional[ProjectInfo] = None
228
+
229
+ def scan(self) -> ProjectInfo:
230
+ """Scan the project and return project info.
231
+
232
+ Returns:
233
+ ProjectInfo with project structure analysis
234
+ """
235
+ if self._project_info is not None:
236
+ return self._project_info
237
+
238
+ source_files: List[ProjectFile] = []
239
+ config_files: List[ProjectFile] = []
240
+ entry_points: List[ProjectFile] = []
241
+ frameworks_detected: Set[str] = set()
242
+ language_counts: Dict[Language, int] = {}
243
+
244
+ # Scan files
245
+ file_count = 0
246
+ for root, dirs, files in os.walk(self.project_root):
247
+ # Skip unwanted directories
248
+ dirs[:] = [d for d in dirs if d not in SKIP_DIRS and not d.startswith('.')]
249
+
250
+ for filename in files:
251
+ if file_count >= self.max_files:
252
+ break
253
+
254
+ filepath = os.path.join(root, filename)
255
+ rel_path = os.path.relpath(filepath, self.project_root)
256
+
257
+ # Check if config file
258
+ if filename in CONFIG_FILES:
259
+ try:
260
+ size = os.path.getsize(filepath)
261
+ config_files.append(ProjectFile(
262
+ path=rel_path,
263
+ language=Language.UNKNOWN,
264
+ is_config=True,
265
+ size=size,
266
+ ))
267
+ except OSError:
268
+ pass
269
+ continue
270
+
271
+ # Detect language
272
+ language = self._detect_language(filename)
273
+ if language == Language.UNKNOWN:
274
+ continue
275
+
276
+ try:
277
+ size = os.path.getsize(filepath)
278
+ except OSError:
279
+ size = 0
280
+
281
+ # Check if entry point
282
+ is_entry = self._is_entry_point(rel_path, language)
283
+
284
+ pf = ProjectFile(
285
+ path=rel_path,
286
+ language=language,
287
+ is_entry_point=is_entry,
288
+ size=size,
289
+ )
290
+
291
+ source_files.append(pf)
292
+ if is_entry:
293
+ entry_points.append(pf)
294
+
295
+ # Count languages
296
+ language_counts[language] = language_counts.get(language, 0) + 1
297
+ file_count += 1
298
+
299
+ if file_count >= self.max_files:
300
+ break
301
+
302
+ # Detect frameworks from entry points and key files
303
+ frameworks_detected = self._detect_frameworks(entry_points + source_files[:50])
304
+
305
+ # Determine primary language
306
+ primary_language = max(language_counts, key=language_counts.get) if language_counts else Language.UNKNOWN
307
+
308
+ # Determine project type
309
+ project_type = self._determine_project_type(frameworks_detected, primary_language)
310
+
311
+ self._project_info = ProjectInfo(
312
+ root=self.project_root,
313
+ project_type=project_type,
314
+ primary_language=primary_language,
315
+ entry_points=entry_points,
316
+ source_files=source_files,
317
+ config_files=config_files,
318
+ frameworks_detected=list(frameworks_detected),
319
+ )
320
+
321
+ return self._project_info
322
+
323
+ def _detect_language(self, filename: str) -> Language:
324
+ """Detect language from filename."""
325
+ ext = os.path.splitext(filename)[1].lower()
326
+ return Language.from_extension(ext)
327
+
328
+ def _is_entry_point(self, rel_path: str, language: Language) -> bool:
329
+ """Check if file is likely an entry point."""
330
+ patterns = ENTRY_POINT_PATTERNS.get(language, [])
331
+ filename = os.path.basename(rel_path)
332
+
333
+ for pattern in patterns:
334
+ if re.match(pattern, filename, re.IGNORECASE):
335
+ return True
336
+ if re.match(pattern, rel_path, re.IGNORECASE):
337
+ return True
338
+
339
+ return False
340
+
341
+ def _detect_frameworks(self, files: List[ProjectFile]) -> Set[str]:
342
+ """Detect frameworks from file contents."""
343
+ frameworks: Set[str] = set()
344
+
345
+ for pf in files:
346
+ filepath = os.path.join(self.project_root, pf.path)
347
+
348
+ try:
349
+ # Only read first 10KB
350
+ with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
351
+ content = f.read(10240)
352
+ except (IOError, OSError):
353
+ continue
354
+
355
+ for framework, patterns in FRAMEWORK_PATTERNS.items():
356
+ for pattern, lang in patterns:
357
+ if lang is None:
358
+ # File extension check
359
+ if re.search(pattern, pf.path):
360
+ frameworks.add(framework)
361
+ elif lang == pf.language:
362
+ if re.search(pattern, content):
363
+ frameworks.add(framework)
364
+
365
+ return frameworks
366
+
367
+ def _determine_project_type(self, frameworks: Set[str], primary_lang: Language) -> str:
368
+ """Determine project type from frameworks and language."""
369
+ # Priority order for project type
370
+ if 'flask' in frameworks:
371
+ return 'flask'
372
+ if 'fastapi' in frameworks:
373
+ return 'fastapi'
374
+ if 'django' in frameworks:
375
+ return 'django'
376
+ if 'express' in frameworks:
377
+ return 'express'
378
+ if 'gin' in frameworks:
379
+ return 'gin'
380
+ if 'actix' in frameworks:
381
+ return 'actix'
382
+ if 'spring' in frameworks:
383
+ return 'spring'
384
+ if 'react' in frameworks:
385
+ return 'react'
386
+ if 'vue' in frameworks:
387
+ return 'vue'
388
+
389
+ # Fall back to language
390
+ lang_types = {
391
+ Language.PYTHON: 'python',
392
+ Language.JAVASCRIPT: 'javascript',
393
+ Language.TYPESCRIPT: 'typescript',
394
+ Language.GO: 'go',
395
+ Language.RUST: 'rust',
396
+ Language.JAVA: 'java',
397
+ }
398
+ return lang_types.get(primary_lang, 'unknown')
399
+
400
+ def find_relevant_files(self, error_message: str, limit: int = 10) -> List[ProjectFile]:
401
+ """Find files relevant to an error message.
402
+
403
+ Uses keyword extraction and pattern matching to find files
404
+ that might be related to the error.
405
+
406
+ Args:
407
+ error_message: The error message to analyze
408
+ limit: Maximum number of files to return
409
+
410
+ Returns:
411
+ List of potentially relevant ProjectFiles
412
+ """
413
+ if self._project_info is None:
414
+ self.scan()
415
+
416
+ # Extract potential filenames and keywords from error
417
+ keywords = self._extract_keywords(error_message)
418
+
419
+ scored_files: List[tuple] = []
420
+
421
+ for pf in self._project_info.source_files:
422
+ score = self._score_relevance(pf, keywords, error_message)
423
+ if score > 0:
424
+ scored_files.append((score, pf))
425
+
426
+ # Sort by score descending
427
+ scored_files.sort(key=lambda x: x[0], reverse=True)
428
+
429
+ return [pf for _, pf in scored_files[:limit]]
430
+
431
+ def _extract_keywords(self, error_message: str) -> Set[str]:
432
+ """Extract relevant keywords from error message."""
433
+ keywords = set()
434
+
435
+ # Common error-related words to ignore
436
+ ignore_words = {
437
+ 'error', 'exception', 'failed', 'cannot', 'could', 'not',
438
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been',
439
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
440
+ 'at', 'in', 'on', 'to', 'for', 'of', 'with', 'by', 'from',
441
+ 'get', 'post', 'put', 'delete', 'http', 'https',
442
+ }
443
+
444
+ # Extract potential file names
445
+ file_patterns = [
446
+ r'[\w\-]+\.(?:py|js|ts|go|rs|java|jsx|tsx)',
447
+ r'/[\w\-/]+\.(?:py|js|ts|go|rs|java|jsx|tsx)',
448
+ ]
449
+ for pattern in file_patterns:
450
+ matches = re.findall(pattern, error_message, re.IGNORECASE)
451
+ keywords.update(m.lower() for m in matches)
452
+
453
+ # Extract route patterns
454
+ route_patterns = re.findall(r'/[\w\-/]+', error_message)
455
+ for route in route_patterns:
456
+ parts = route.strip('/').split('/')
457
+ keywords.update(p.lower() for p in parts if len(p) > 2)
458
+
459
+ # Extract identifiers (CamelCase, snake_case)
460
+ identifiers = re.findall(r'\b[A-Z][a-z]+(?:[A-Z][a-z]+)+\b', error_message) # CamelCase
461
+ keywords.update(i.lower() for i in identifiers)
462
+
463
+ identifiers = re.findall(r'\b[a-z]+(?:_[a-z]+)+\b', error_message) # snake_case
464
+ keywords.update(identifiers)
465
+
466
+ # Extract quoted strings
467
+ quoted = re.findall(r'[\'"]([^\'"]+)[\'"]', error_message)
468
+ for q in quoted:
469
+ if len(q) > 2 and not q.startswith('http'):
470
+ keywords.add(q.lower())
471
+
472
+ # Remove ignored words
473
+ keywords -= ignore_words
474
+
475
+ return keywords
476
+
477
+ def _score_relevance(self, pf: ProjectFile, keywords: Set[str], error_message: str) -> float:
478
+ """Score how relevant a file is to the error."""
479
+ score = 0.0
480
+ path_lower = pf.path.lower()
481
+ filename_lower = pf.filename.lower()
482
+
483
+ # Entry points get a boost
484
+ if pf.is_entry_point:
485
+ score += 2.0
486
+
487
+ # Direct filename match
488
+ for kw in keywords:
489
+ if kw in filename_lower:
490
+ score += 3.0
491
+ elif kw in path_lower:
492
+ score += 1.5
493
+
494
+ # Route-related files for HTTP errors
495
+ if 'cannot get' in error_message.lower() or '404' in error_message:
496
+ if any(x in filename_lower for x in ['route', 'app', 'server', 'index', 'view', 'controller']):
497
+ score += 2.0
498
+
499
+ # Static file serving errors
500
+ if 'static' in error_message.lower() or 'index.html' in error_message.lower():
501
+ if any(x in path_lower for x in ['static', 'public', 'build', 'dist', 'frontend']):
502
+ score += 1.5
503
+ if any(x in filename_lower for x in ['app', 'server', 'main', 'index']):
504
+ score += 2.0
505
+
506
+ # API errors
507
+ if 'api' in error_message.lower():
508
+ if 'api' in path_lower:
509
+ score += 2.0
510
+
511
+ return score
512
+
513
+ def get_file_content(self, rel_path: str) -> Optional[str]:
514
+ """Read file content by relative path.
515
+
516
+ Args:
517
+ rel_path: Relative path from project root
518
+
519
+ Returns:
520
+ File content or None if not readable
521
+ """
522
+ filepath = os.path.join(self.project_root, rel_path)
523
+ try:
524
+ with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
525
+ return f.read()
526
+ except (IOError, OSError):
527
+ return None
528
+
529
+ def get_project_context(self, max_files: int = 5) -> str:
530
+ """Generate project context string for AI.
531
+
532
+ Args:
533
+ max_files: Maximum number of entry point contents to include
534
+
535
+ Returns:
536
+ Formatted project context string
537
+ """
538
+ if self._project_info is None:
539
+ self.scan()
540
+
541
+ info = self._project_info
542
+ lines = [
543
+ "## PROJECT STRUCTURE",
544
+ f"Type: {info.project_type}",
545
+ f"Language: {info.primary_language.value}",
546
+ f"Frameworks: {', '.join(info.frameworks_detected) or 'None detected'}",
547
+ "",
548
+ "### Entry Points:",
549
+ ]
550
+
551
+ for ep in info.entry_points[:max_files]:
552
+ lines.append(f"- {ep.path}")
553
+
554
+ lines.append("")
555
+ lines.append("### Key Files:")
556
+
557
+ # Include content of entry points
558
+ for ep in info.entry_points[:max_files]:
559
+ content = self.get_file_content(ep.path)
560
+ if content:
561
+ lines.append(f"\n#### {ep.path}")
562
+ lines.append("```" + ep.language.value)
563
+ # Limit content length
564
+ if len(content) > 2000:
565
+ content = content[:2000] + "\n... (truncated)"
566
+ lines.append(content)
567
+ lines.append("```")
568
+
569
+ return "\n".join(lines)
@@ -0,0 +1,5 @@
1
+ """Utility modules for ROMA Debug."""
2
+
3
+ from roma_debug.utils.context import get_file_context
4
+
5
+ __all__ = ["get_file_context"]