code-compass-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_compass_cli-0.1.0.dist-info/METADATA +46 -0
- code_compass_cli-0.1.0.dist-info/RECORD +31 -0
- code_compass_cli-0.1.0.dist-info/WHEEL +5 -0
- code_compass_cli-0.1.0.dist-info/entry_points.txt +2 -0
- code_compass_cli-0.1.0.dist-info/top_level.txt +1 -0
- src/__init__.py +3 -0
- src/__pycache__/__init__.cpython-313.pyc +0 -0
- src/cli/__init__.py +1 -0
- src/cli/__pycache__/__init__.cpython-313.pyc +0 -0
- src/cli/__pycache__/main.cpython-313.pyc +0 -0
- src/cli/main.py +431 -0
- src/docs/__init__.py +1 -0
- src/docs/__pycache__/__init__.cpython-313.pyc +0 -0
- src/docs/__pycache__/doc_generator.cpython-313.pyc +0 -0
- src/docs/doc_generator.py +734 -0
- src/quality/__init__.py +1 -0
- src/quality/__pycache__/__init__.cpython-313.pyc +0 -0
- src/quality/__pycache__/analyzer.cpython-313.pyc +0 -0
- src/quality/analyzer.py +300 -0
- src/query/__init__.py +1 -0
- src/query/__pycache__/__init__.cpython-313.pyc +0 -0
- src/query/__pycache__/copilot_query.cpython-313.pyc +0 -0
- src/query/copilot_query.py +475 -0
- src/scanner/__init__.py +1 -0
- src/scanner/__pycache__/__init__.cpython-313.pyc +0 -0
- src/scanner/__pycache__/repo_scanner.cpython-313.pyc +0 -0
- src/scanner/repo_scanner.py +139 -0
- src/visualizer/__init__.py +1 -0
- src/visualizer/__pycache__/__init__.cpython-313.pyc +0 -0
- src/visualizer/__pycache__/flow_tracer.cpython-313.pyc +0 -0
- src/visualizer/flow_tracer.py +294 -0
|
@@ -0,0 +1,475 @@
|
|
|
1
|
+
"""Code query engine for analyzing source files."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, List, Any, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CopilotQuery:
|
|
10
|
+
"""Query interface for code analysis."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, repo_path: str = "."):
|
|
13
|
+
"""Initialize the query engine.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
repo_path: Path to the repository to analyze
|
|
17
|
+
"""
|
|
18
|
+
self.repo_path = Path(repo_path)
|
|
19
|
+
self.history: List[Dict[str, Any]] = []
|
|
20
|
+
self._supported_extensions = (
|
|
21
|
+
".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h",
|
|
22
|
+
".php", ".rb", ".go", ".rs", ".sql", ".swift", ".kt"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def execute(self, query: str, context: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
26
|
+
"""Execute a query against the codebase.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
query: Natural language query string
|
|
30
|
+
context: Optional context about the codebase
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Dictionary containing query results
|
|
34
|
+
"""
|
|
35
|
+
query_lower = query.lower()
|
|
36
|
+
results = []
|
|
37
|
+
|
|
38
|
+
# Route to appropriate search method
|
|
39
|
+
if any(word in query_lower for word in ["method", "function", "class", "attribute", "define"]):
|
|
40
|
+
results = self._definition_query(query)
|
|
41
|
+
elif any(word in query_lower for word in ["import", "depend", "require"]):
|
|
42
|
+
results = self._import_query(query)
|
|
43
|
+
elif any(word in query_lower for word in ["find", "search", "where", "locate"]):
|
|
44
|
+
results = self._search_query(query)
|
|
45
|
+
else:
|
|
46
|
+
# General query - search source files for keywords
|
|
47
|
+
results = self._general_query(query)
|
|
48
|
+
|
|
49
|
+
result_entry = {
|
|
50
|
+
"query": query,
|
|
51
|
+
"context": context or {},
|
|
52
|
+
"results": results,
|
|
53
|
+
"summary": self._generate_summary(results),
|
|
54
|
+
}
|
|
55
|
+
self.history.append(result_entry)
|
|
56
|
+
return result_entry
|
|
57
|
+
|
|
58
|
+
def _general_query(self, query: str) -> List[Dict[str, Any]]:
|
|
59
|
+
"""Handle general questions by searching source code.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
query: Query string
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
List of relevant code files
|
|
66
|
+
"""
|
|
67
|
+
keywords = self._extract_keywords(query)
|
|
68
|
+
if not keywords:
|
|
69
|
+
return []
|
|
70
|
+
|
|
71
|
+
results = []
|
|
72
|
+
# Search for matching files by name and content
|
|
73
|
+
for keyword in keywords:
|
|
74
|
+
file_matches = self._search_files_by_name(keyword)
|
|
75
|
+
content_matches = self._search_files_by_content(keyword, limit_per_keyword=2)
|
|
76
|
+
|
|
77
|
+
for match in file_matches:
|
|
78
|
+
if match not in results:
|
|
79
|
+
results.append(match)
|
|
80
|
+
|
|
81
|
+
for match in content_matches:
|
|
82
|
+
if match not in results:
|
|
83
|
+
results.append(match)
|
|
84
|
+
|
|
85
|
+
return results[:10] # Limit total results
|
|
86
|
+
|
|
87
|
+
def _search_files_by_name(self, keyword: str) -> List[Dict[str, Any]]:
|
|
88
|
+
"""Search for files matching keyword in filename.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
keyword: Search keyword
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
List of matching files with content
|
|
95
|
+
"""
|
|
96
|
+
matches = []
|
|
97
|
+
keyword_lower = keyword.lower()
|
|
98
|
+
|
|
99
|
+
for root, _, files in os.walk(self.repo_path):
|
|
100
|
+
if self._should_skip_dir(root):
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
for filename in files:
|
|
104
|
+
if filename.lower().endswith(self._supported_extensions):
|
|
105
|
+
# Check if keyword matches filename
|
|
106
|
+
if keyword_lower in filename.lower():
|
|
107
|
+
filepath = Path(root) / filename
|
|
108
|
+
rel_path = filepath.relative_to(self.repo_path)
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
112
|
+
content = f.read()
|
|
113
|
+
matches.append({
|
|
114
|
+
"file": str(rel_path),
|
|
115
|
+
"keyword": keyword,
|
|
116
|
+
"type": "filename_match",
|
|
117
|
+
"content": self._extract_relevant_content(content, keyword, lines=30),
|
|
118
|
+
"match_type": "File name contains keyword",
|
|
119
|
+
})
|
|
120
|
+
except (IOError, OSError):
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
return matches
|
|
124
|
+
|
|
125
|
+
def _search_files_by_content(self, keyword: str, limit_per_keyword: int = 2) -> List[Dict[str, Any]]:
|
|
126
|
+
"""Search for content matching keyword in source files.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
keyword: Search keyword
|
|
130
|
+
limit_per_keyword: Max files to return per keyword
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
List of matching files with relevant content
|
|
134
|
+
"""
|
|
135
|
+
matches = []
|
|
136
|
+
keyword_lower = keyword.lower()
|
|
137
|
+
found_count = 0
|
|
138
|
+
|
|
139
|
+
for root, _, files in os.walk(self.repo_path):
|
|
140
|
+
if self._should_skip_dir(root):
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
if found_count >= limit_per_keyword:
|
|
144
|
+
break
|
|
145
|
+
|
|
146
|
+
for filename in files:
|
|
147
|
+
if found_count >= limit_per_keyword:
|
|
148
|
+
break
|
|
149
|
+
|
|
150
|
+
if filename.lower().endswith(self._supported_extensions):
|
|
151
|
+
filepath = Path(root) / filename
|
|
152
|
+
rel_path = filepath.relative_to(self.repo_path)
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
156
|
+
content = f.read()
|
|
157
|
+
# Count matches
|
|
158
|
+
if keyword_lower in content.lower():
|
|
159
|
+
relevant_content = self._extract_relevant_content(content, keyword, lines=25)
|
|
160
|
+
if relevant_content:
|
|
161
|
+
matches.append({
|
|
162
|
+
"file": str(rel_path),
|
|
163
|
+
"keyword": keyword,
|
|
164
|
+
"type": "content_match",
|
|
165
|
+
"content": relevant_content,
|
|
166
|
+
"match_type": "Content contains keyword",
|
|
167
|
+
})
|
|
168
|
+
found_count += 1
|
|
169
|
+
except (IOError, OSError):
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
return matches
|
|
173
|
+
|
|
174
|
+
def _search_query(self, query: str) -> List[Dict[str, Any]]:
|
|
175
|
+
"""Search for keywords in source code.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
query: Search query string
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
List of matching files and locations
|
|
182
|
+
"""
|
|
183
|
+
keywords = self._extract_keywords(query)
|
|
184
|
+
matches = []
|
|
185
|
+
|
|
186
|
+
for keyword in keywords:
|
|
187
|
+
for root, _, files in os.walk(self.repo_path):
|
|
188
|
+
if self._should_skip_dir(root):
|
|
189
|
+
continue
|
|
190
|
+
|
|
191
|
+
for filename in files:
|
|
192
|
+
if filename.lower().endswith(self._supported_extensions):
|
|
193
|
+
filepath = Path(root) / filename
|
|
194
|
+
rel_path = filepath.relative_to(self.repo_path)
|
|
195
|
+
|
|
196
|
+
try:
|
|
197
|
+
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
198
|
+
content = f.read()
|
|
199
|
+
if keyword.lower() in content.lower():
|
|
200
|
+
lines = self._find_keyword_lines(content, keyword)
|
|
201
|
+
if lines:
|
|
202
|
+
matches.append({
|
|
203
|
+
"file": str(rel_path),
|
|
204
|
+
"keyword": keyword,
|
|
205
|
+
"lines": lines,
|
|
206
|
+
"match_type": "Keyword found in code",
|
|
207
|
+
})
|
|
208
|
+
break
|
|
209
|
+
except (IOError, OSError):
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
return matches[:15] # Limit results
|
|
213
|
+
|
|
214
|
+
def _definition_query(self, query: str) -> List[Dict[str, Any]]:
|
|
215
|
+
"""Search for function/class definitions.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
query: Definition query string
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
List of definitions found
|
|
222
|
+
"""
|
|
223
|
+
keywords = self._extract_keywords(query)
|
|
224
|
+
definitions = []
|
|
225
|
+
|
|
226
|
+
for keyword in keywords:
|
|
227
|
+
for root, _, files in os.walk(self.repo_path):
|
|
228
|
+
if self._should_skip_dir(root):
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
for filename in files:
|
|
232
|
+
if filename.lower().endswith(self._supported_extensions):
|
|
233
|
+
filepath = Path(root) / filename
|
|
234
|
+
rel_path = filepath.relative_to(self.repo_path)
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
238
|
+
content = f.read()
|
|
239
|
+
defs = self._find_definitions(content, keyword, filename)
|
|
240
|
+
if defs:
|
|
241
|
+
definitions.append({
|
|
242
|
+
"file": str(rel_path),
|
|
243
|
+
"keyword": keyword,
|
|
244
|
+
"definitions": defs,
|
|
245
|
+
"match_type": "Function/Class definition",
|
|
246
|
+
})
|
|
247
|
+
except (IOError, OSError):
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
return definitions[:10]
|
|
251
|
+
|
|
252
|
+
def _import_query(self, query: str) -> List[Dict[str, Any]]:
|
|
253
|
+
"""Search for imports and dependencies.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
query: Import query string
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
List of imports found
|
|
260
|
+
"""
|
|
261
|
+
keywords = self._extract_keywords(query)
|
|
262
|
+
imports = []
|
|
263
|
+
|
|
264
|
+
for root, _, files in os.walk(self.repo_path):
|
|
265
|
+
if self._should_skip_dir(root):
|
|
266
|
+
continue
|
|
267
|
+
|
|
268
|
+
for filename in files:
|
|
269
|
+
if filename.lower().endswith(self._supported_extensions):
|
|
270
|
+
filepath = Path(root) / filename
|
|
271
|
+
rel_path = filepath.relative_to(self.repo_path)
|
|
272
|
+
|
|
273
|
+
try:
|
|
274
|
+
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
275
|
+
for line_num, line in enumerate(f, 1):
|
|
276
|
+
if "import" in line.lower() or "require" in line.lower():
|
|
277
|
+
for keyword in keywords:
|
|
278
|
+
if keyword.lower() in line.lower():
|
|
279
|
+
imports.append({
|
|
280
|
+
"file": str(rel_path),
|
|
281
|
+
"line": line_num,
|
|
282
|
+
"content": line.strip(),
|
|
283
|
+
"match_type": "Import/Require statement",
|
|
284
|
+
})
|
|
285
|
+
break
|
|
286
|
+
except (IOError, OSError):
|
|
287
|
+
continue
|
|
288
|
+
|
|
289
|
+
return imports[:20]
|
|
290
|
+
|
|
291
|
+
def _should_skip_dir(self, path: str) -> bool:
|
|
292
|
+
"""Check if directory should be skipped.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
path: Directory path
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
True if should skip, False otherwise
|
|
299
|
+
"""
|
|
300
|
+
skip_patterns = {"venv", ".git", "__pycache__", "node_modules", ".egg-info", "dist", "build", "vendor"}
|
|
301
|
+
for pattern in skip_patterns:
|
|
302
|
+
if pattern in path:
|
|
303
|
+
return True
|
|
304
|
+
return False
|
|
305
|
+
|
|
306
|
+
def _extract_relevant_content(self, content: str, keyword: str, lines: int = 20) -> str:
|
|
307
|
+
"""Extract relevant content around keyword.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
content: File content
|
|
311
|
+
keyword: Search keyword
|
|
312
|
+
lines: Number of lines to include
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
Relevant content snippet
|
|
316
|
+
"""
|
|
317
|
+
all_lines = content.split("\n")
|
|
318
|
+
keyword_lower = keyword.lower()
|
|
319
|
+
|
|
320
|
+
for i, line in enumerate(all_lines):
|
|
321
|
+
if keyword_lower in line.lower():
|
|
322
|
+
start = max(0, i - 2)
|
|
323
|
+
end = min(len(all_lines), i + lines)
|
|
324
|
+
snippet = "\n".join(all_lines[start:end])
|
|
325
|
+
return snippet[:1000] # Limit to 1000 chars
|
|
326
|
+
|
|
327
|
+
return ""
|
|
328
|
+
|
|
329
|
+
def _extract_keywords(self, query: str) -> List[str]:
|
|
330
|
+
"""Extract search keywords from query.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
query: Query string
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
List of keywords
|
|
337
|
+
"""
|
|
338
|
+
stopwords = {
|
|
339
|
+
"find", "search", "where", "locate", "the", "a", "an", "in", "for",
|
|
340
|
+
"define", "what", "how", "why", "function", "class", "import", "depend",
|
|
341
|
+
"all", "this", "does", "is", "are", "project", "code", "do", "can",
|
|
342
|
+
"will", "should", "by", "and", "or", "not", "to", "of", "with", "from",
|
|
343
|
+
"describe", "explain", "purpose", "method", "file", "files", "require",
|
|
344
|
+
"attribute", "check", "show", "tell", "list", "get", "work", "work",
|
|
345
|
+
"do", "does", "done", "doing", "on", "at", "it", "its", "have", "has"
|
|
346
|
+
}
|
|
347
|
+
words = query.split()
|
|
348
|
+
keywords = [w.strip("'\".,!?;:") for w in words
|
|
349
|
+
if w.lower() not in stopwords and len(w.strip("'\".,!?;:")) > 2]
|
|
350
|
+
return keywords
|
|
351
|
+
|
|
352
|
+
def _find_keyword_lines(self, content: str, keyword: str, context_lines: int = 3) -> List[Dict[str, Any]]:
|
|
353
|
+
"""Find lines containing keyword with context.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
content: File content
|
|
357
|
+
keyword: Keyword to search for
|
|
358
|
+
context_lines: Number of context lines to include
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
List of matching lines with context
|
|
362
|
+
"""
|
|
363
|
+
lines = content.split("\n")
|
|
364
|
+
matches = []
|
|
365
|
+
keyword_lower = keyword.lower()
|
|
366
|
+
|
|
367
|
+
for i, line in enumerate(lines):
|
|
368
|
+
if keyword_lower in line.lower():
|
|
369
|
+
start = max(0, i - context_lines)
|
|
370
|
+
end = min(len(lines), i + context_lines + 1)
|
|
371
|
+
matches.append({
|
|
372
|
+
"line_num": i + 1,
|
|
373
|
+
"content": line.strip(),
|
|
374
|
+
"context": "\n".join(lines[start:end]),
|
|
375
|
+
})
|
|
376
|
+
if len(matches) >= 3:
|
|
377
|
+
break
|
|
378
|
+
|
|
379
|
+
return matches
|
|
380
|
+
|
|
381
|
+
def _find_definitions(self, content: str, keyword: str, filename: str) -> List[Dict[str, Any]]:
|
|
382
|
+
"""Find function/class definitions matching keyword.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
content: File content
|
|
386
|
+
keyword: Keyword to search for
|
|
387
|
+
filename: Name of the file
|
|
388
|
+
|
|
389
|
+
Returns:
|
|
390
|
+
List of definitions found
|
|
391
|
+
"""
|
|
392
|
+
definitions = []
|
|
393
|
+
lines = content.split("\n")
|
|
394
|
+
keyword_esc = re.escape(keyword)
|
|
395
|
+
|
|
396
|
+
# Build patterns based on file type
|
|
397
|
+
if filename.endswith(".py"):
|
|
398
|
+
patterns = [
|
|
399
|
+
(r"^\s*def\s+(\w*" + keyword_esc + r"\w*)\s*\(", "function"),
|
|
400
|
+
(r"^\s*class\s+(\w*" + keyword_esc + r"\w*)\s*[\(:]", "class"),
|
|
401
|
+
(r"^\s*async\s+def\s+(\w*" + keyword_esc + r"\w*)\s*\(", "async_function"),
|
|
402
|
+
]
|
|
403
|
+
elif filename.endswith((".php", ".java", ".cpp", ".c", ".swift", ".kt")):
|
|
404
|
+
patterns = [
|
|
405
|
+
(r"(?:public|private|protected|static)?\s+(?:function|void|int|string|bool|class|interface|struct)\s+(\w*" + keyword_esc + r"\w*)\s*[\({\(]", "function/class"),
|
|
406
|
+
]
|
|
407
|
+
elif filename.endswith((".js", ".ts", ".jsx", ".tsx")):
|
|
408
|
+
patterns = [
|
|
409
|
+
(r"(?:function|const|let|var)\s+(\w*" + keyword_esc + r"\w*)\s*[=\(]", "function"),
|
|
410
|
+
(r"class\s+(\w*" + keyword_esc + r"\w*)\s*[{]", "class"),
|
|
411
|
+
]
|
|
412
|
+
else:
|
|
413
|
+
patterns = [
|
|
414
|
+
(r"(?:function|def|class)\s+(\w*" + keyword_esc + r"\w*)", "definition"),
|
|
415
|
+
]
|
|
416
|
+
|
|
417
|
+
for i, line in enumerate(lines):
|
|
418
|
+
for pattern, def_type in patterns:
|
|
419
|
+
match = re.search(pattern, line, re.IGNORECASE)
|
|
420
|
+
if match:
|
|
421
|
+
# Get function/class body (next few lines)
|
|
422
|
+
body_start = i + 1
|
|
423
|
+
body_end = min(len(lines), i + 10)
|
|
424
|
+
body = "\n".join(lines[body_start:body_end])
|
|
425
|
+
|
|
426
|
+
definitions.append({
|
|
427
|
+
"line_num": i + 1,
|
|
428
|
+
"type": def_type,
|
|
429
|
+
"name": match.group(1),
|
|
430
|
+
"code": line.strip(),
|
|
431
|
+
"body": body[:500], # First 500 chars of body
|
|
432
|
+
})
|
|
433
|
+
|
|
434
|
+
return definitions
|
|
435
|
+
|
|
436
|
+
def _generate_summary(self, results: List[Dict[str, Any]]) -> str:
|
|
437
|
+
"""Generate a summary of results.
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
results: List of search results
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
Summary string
|
|
444
|
+
"""
|
|
445
|
+
if not results:
|
|
446
|
+
return "No matching code found."
|
|
447
|
+
|
|
448
|
+
file_count = len(results)
|
|
449
|
+
total_matches = sum(
|
|
450
|
+
len(r.get("lines", [])) + len(r.get("definitions", []))
|
|
451
|
+
for r in results if r.get("lines") or r.get("definitions")
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
if any(r.get("type") == "filename_match" for r in results):
|
|
455
|
+
return f"Found {file_count} file(s) matching the query keywords."
|
|
456
|
+
|
|
457
|
+
if any(r.get("type") == "content_match" for r in results):
|
|
458
|
+
return f"Found {file_count} file(s) with relevant code content."
|
|
459
|
+
|
|
460
|
+
if total_matches > 0:
|
|
461
|
+
return f"Found {file_count} file(s) with {total_matches} match(es) in code."
|
|
462
|
+
|
|
463
|
+
return f"Found {file_count} relevant file(s)."
|
|
464
|
+
|
|
465
|
+
def get_history(self) -> List[Dict[str, Any]]:
|
|
466
|
+
"""Get query history.
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
List of past queries and results
|
|
470
|
+
"""
|
|
471
|
+
return self.history
|
|
472
|
+
|
|
473
|
+
def clear_history(self) -> None:
|
|
474
|
+
"""Clear query history."""
|
|
475
|
+
self.history = []
|
src/scanner/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Repository scanning module."""
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Repository scanner for analyzing code structure."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List, Dict, Any, Optional
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class TreeNode:
|
|
11
|
+
"""Represents a node in the directory tree."""
|
|
12
|
+
name: str
|
|
13
|
+
is_dir: bool
|
|
14
|
+
children: List['TreeNode'] = None
|
|
15
|
+
|
|
16
|
+
def __post_init__(self):
|
|
17
|
+
if self.children is None:
|
|
18
|
+
self.children = []
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RepoScanner:
|
|
22
|
+
"""Scans a repository to extract code structure and metadata."""
|
|
23
|
+
|
|
24
|
+
# Files/dirs to exclude from scan
|
|
25
|
+
EXCLUDE_PATTERNS = {
|
|
26
|
+
'venv', '.git', '__pycache__', '.pytest_cache', '.egg-info',
|
|
27
|
+
'node_modules', '.env', '.venv', 'dist', 'build', '.DS_Store',
|
|
28
|
+
'*.pyc', '.coverage', '.mypy_cache', 'htmlcov'
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
def __init__(self, repo_path: str = '.'):
|
|
32
|
+
"""Initialize the repository scanner.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
repo_path: Path to the repository to scan
|
|
36
|
+
"""
|
|
37
|
+
self.repo_path = Path(repo_path)
|
|
38
|
+
|
|
39
|
+
def scan(self) -> Dict[str, Any]:
|
|
40
|
+
"""Scan the repository and return structure information.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Dictionary containing repository structure and metadata
|
|
44
|
+
"""
|
|
45
|
+
if not self.repo_path.exists():
|
|
46
|
+
raise FileNotFoundError(f"Repository path not found: {self.repo_path}")
|
|
47
|
+
|
|
48
|
+
return {
|
|
49
|
+
"path": str(self.repo_path),
|
|
50
|
+
"tree": self._build_tree(),
|
|
51
|
+
"files": self._collect_files(),
|
|
52
|
+
"directories": self._collect_directories(),
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
def _should_exclude(self, path: str) -> bool:
|
|
56
|
+
"""Check if a path should be excluded from scan."""
|
|
57
|
+
for pattern in self.EXCLUDE_PATTERNS:
|
|
58
|
+
if pattern.replace('*', '') in path:
|
|
59
|
+
return True
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
def _build_tree(self, start_path: Optional[Path] = None) -> TreeNode:
|
|
63
|
+
"""Build a tree structure of the repository.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
start_path: Starting path for tree building (defaults to repo_path)
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Root TreeNode of the directory tree
|
|
70
|
+
"""
|
|
71
|
+
if start_path is None:
|
|
72
|
+
start_path = self.repo_path
|
|
73
|
+
|
|
74
|
+
root = TreeNode(name=start_path.name or str(start_path), is_dir=True)
|
|
75
|
+
self._populate_tree(start_path, root)
|
|
76
|
+
return root
|
|
77
|
+
|
|
78
|
+
def _populate_tree(self, current_path: Path, node: TreeNode, depth: int = 0) -> None:
|
|
79
|
+
"""Recursively populate tree with directory/file entries.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
current_path: Current directory path
|
|
83
|
+
node: Current tree node
|
|
84
|
+
depth: Current recursion depth (limit to 10 to prevent infinite loops)
|
|
85
|
+
"""
|
|
86
|
+
if depth > 10:
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
entries = sorted(current_path.iterdir(), key=lambda x: (not x.is_dir(), x.name))
|
|
91
|
+
except PermissionError:
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
for entry in entries:
|
|
95
|
+
if self._should_exclude(entry.name):
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
child = TreeNode(name=entry.name, is_dir=entry.is_dir())
|
|
99
|
+
|
|
100
|
+
if entry.is_dir():
|
|
101
|
+
self._populate_tree(entry, child, depth + 1)
|
|
102
|
+
|
|
103
|
+
node.children.append(child)
|
|
104
|
+
|
|
105
|
+
def _collect_files(self) -> List[str]:
|
|
106
|
+
"""Collect all files in the repository.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
List of file paths
|
|
110
|
+
"""
|
|
111
|
+
files = []
|
|
112
|
+
for root, _, filenames in os.walk(self.repo_path):
|
|
113
|
+
if self._should_exclude(root):
|
|
114
|
+
continue
|
|
115
|
+
for filename in filenames:
|
|
116
|
+
if self._should_exclude(filename):
|
|
117
|
+
continue
|
|
118
|
+
file_path = os.path.join(root, filename)
|
|
119
|
+
rel_path = os.path.relpath(file_path, self.repo_path)
|
|
120
|
+
files.append(rel_path)
|
|
121
|
+
return files
|
|
122
|
+
|
|
123
|
+
def _collect_directories(self) -> List[str]:
|
|
124
|
+
"""Collect all directories in the repository.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
List of directory paths
|
|
128
|
+
"""
|
|
129
|
+
directories = []
|
|
130
|
+
for root, dirnames, _ in os.walk(self.repo_path):
|
|
131
|
+
if self._should_exclude(root):
|
|
132
|
+
continue
|
|
133
|
+
for dirname in dirnames:
|
|
134
|
+
if self._should_exclude(dirname):
|
|
135
|
+
continue
|
|
136
|
+
dir_path = os.path.join(root, dirname)
|
|
137
|
+
rel_path = os.path.relpath(dir_path, self.repo_path)
|
|
138
|
+
directories.append(rel_path)
|
|
139
|
+
return directories
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Visualization module for code flows and dependencies."""
|
|
Binary file
|
|
Binary file
|