code-compass-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,475 @@
1
+ """Code query engine for analyzing source files."""
2
+
3
+ import os
4
+ import re
5
+ from pathlib import Path
6
+ from typing import Dict, List, Any, Optional
7
+
8
+
9
+ class CopilotQuery:
10
+ """Query interface for code analysis."""
11
+
12
+ def __init__(self, repo_path: str = "."):
13
+ """Initialize the query engine.
14
+
15
+ Args:
16
+ repo_path: Path to the repository to analyze
17
+ """
18
+ self.repo_path = Path(repo_path)
19
+ self.history: List[Dict[str, Any]] = []
20
+ self._supported_extensions = (
21
+ ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h",
22
+ ".php", ".rb", ".go", ".rs", ".sql", ".swift", ".kt"
23
+ )
24
+
25
+ def execute(self, query: str, context: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
26
+ """Execute a query against the codebase.
27
+
28
+ Args:
29
+ query: Natural language query string
30
+ context: Optional context about the codebase
31
+
32
+ Returns:
33
+ Dictionary containing query results
34
+ """
35
+ query_lower = query.lower()
36
+ results = []
37
+
38
+ # Route to appropriate search method
39
+ if any(word in query_lower for word in ["method", "function", "class", "attribute", "define"]):
40
+ results = self._definition_query(query)
41
+ elif any(word in query_lower for word in ["import", "depend", "require"]):
42
+ results = self._import_query(query)
43
+ elif any(word in query_lower for word in ["find", "search", "where", "locate"]):
44
+ results = self._search_query(query)
45
+ else:
46
+ # General query - search source files for keywords
47
+ results = self._general_query(query)
48
+
49
+ result_entry = {
50
+ "query": query,
51
+ "context": context or {},
52
+ "results": results,
53
+ "summary": self._generate_summary(results),
54
+ }
55
+ self.history.append(result_entry)
56
+ return result_entry
57
+
58
+ def _general_query(self, query: str) -> List[Dict[str, Any]]:
59
+ """Handle general questions by searching source code.
60
+
61
+ Args:
62
+ query: Query string
63
+
64
+ Returns:
65
+ List of relevant code files
66
+ """
67
+ keywords = self._extract_keywords(query)
68
+ if not keywords:
69
+ return []
70
+
71
+ results = []
72
+ # Search for matching files by name and content
73
+ for keyword in keywords:
74
+ file_matches = self._search_files_by_name(keyword)
75
+ content_matches = self._search_files_by_content(keyword, limit_per_keyword=2)
76
+
77
+ for match in file_matches:
78
+ if match not in results:
79
+ results.append(match)
80
+
81
+ for match in content_matches:
82
+ if match not in results:
83
+ results.append(match)
84
+
85
+ return results[:10] # Limit total results
86
+
87
+ def _search_files_by_name(self, keyword: str) -> List[Dict[str, Any]]:
88
+ """Search for files matching keyword in filename.
89
+
90
+ Args:
91
+ keyword: Search keyword
92
+
93
+ Returns:
94
+ List of matching files with content
95
+ """
96
+ matches = []
97
+ keyword_lower = keyword.lower()
98
+
99
+ for root, _, files in os.walk(self.repo_path):
100
+ if self._should_skip_dir(root):
101
+ continue
102
+
103
+ for filename in files:
104
+ if filename.lower().endswith(self._supported_extensions):
105
+ # Check if keyword matches filename
106
+ if keyword_lower in filename.lower():
107
+ filepath = Path(root) / filename
108
+ rel_path = filepath.relative_to(self.repo_path)
109
+
110
+ try:
111
+ with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
112
+ content = f.read()
113
+ matches.append({
114
+ "file": str(rel_path),
115
+ "keyword": keyword,
116
+ "type": "filename_match",
117
+ "content": self._extract_relevant_content(content, keyword, lines=30),
118
+ "match_type": "File name contains keyword",
119
+ })
120
+ except (IOError, OSError):
121
+ continue
122
+
123
+ return matches
124
+
125
+ def _search_files_by_content(self, keyword: str, limit_per_keyword: int = 2) -> List[Dict[str, Any]]:
126
+ """Search for content matching keyword in source files.
127
+
128
+ Args:
129
+ keyword: Search keyword
130
+ limit_per_keyword: Max files to return per keyword
131
+
132
+ Returns:
133
+ List of matching files with relevant content
134
+ """
135
+ matches = []
136
+ keyword_lower = keyword.lower()
137
+ found_count = 0
138
+
139
+ for root, _, files in os.walk(self.repo_path):
140
+ if self._should_skip_dir(root):
141
+ continue
142
+
143
+ if found_count >= limit_per_keyword:
144
+ break
145
+
146
+ for filename in files:
147
+ if found_count >= limit_per_keyword:
148
+ break
149
+
150
+ if filename.lower().endswith(self._supported_extensions):
151
+ filepath = Path(root) / filename
152
+ rel_path = filepath.relative_to(self.repo_path)
153
+
154
+ try:
155
+ with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
156
+ content = f.read()
157
+ # Count matches
158
+ if keyword_lower in content.lower():
159
+ relevant_content = self._extract_relevant_content(content, keyword, lines=25)
160
+ if relevant_content:
161
+ matches.append({
162
+ "file": str(rel_path),
163
+ "keyword": keyword,
164
+ "type": "content_match",
165
+ "content": relevant_content,
166
+ "match_type": "Content contains keyword",
167
+ })
168
+ found_count += 1
169
+ except (IOError, OSError):
170
+ continue
171
+
172
+ return matches
173
+
174
+ def _search_query(self, query: str) -> List[Dict[str, Any]]:
175
+ """Search for keywords in source code.
176
+
177
+ Args:
178
+ query: Search query string
179
+
180
+ Returns:
181
+ List of matching files and locations
182
+ """
183
+ keywords = self._extract_keywords(query)
184
+ matches = []
185
+
186
+ for keyword in keywords:
187
+ for root, _, files in os.walk(self.repo_path):
188
+ if self._should_skip_dir(root):
189
+ continue
190
+
191
+ for filename in files:
192
+ if filename.lower().endswith(self._supported_extensions):
193
+ filepath = Path(root) / filename
194
+ rel_path = filepath.relative_to(self.repo_path)
195
+
196
+ try:
197
+ with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
198
+ content = f.read()
199
+ if keyword.lower() in content.lower():
200
+ lines = self._find_keyword_lines(content, keyword)
201
+ if lines:
202
+ matches.append({
203
+ "file": str(rel_path),
204
+ "keyword": keyword,
205
+ "lines": lines,
206
+ "match_type": "Keyword found in code",
207
+ })
208
+ break
209
+ except (IOError, OSError):
210
+ continue
211
+
212
+ return matches[:15] # Limit results
213
+
214
+ def _definition_query(self, query: str) -> List[Dict[str, Any]]:
215
+ """Search for function/class definitions.
216
+
217
+ Args:
218
+ query: Definition query string
219
+
220
+ Returns:
221
+ List of definitions found
222
+ """
223
+ keywords = self._extract_keywords(query)
224
+ definitions = []
225
+
226
+ for keyword in keywords:
227
+ for root, _, files in os.walk(self.repo_path):
228
+ if self._should_skip_dir(root):
229
+ continue
230
+
231
+ for filename in files:
232
+ if filename.lower().endswith(self._supported_extensions):
233
+ filepath = Path(root) / filename
234
+ rel_path = filepath.relative_to(self.repo_path)
235
+
236
+ try:
237
+ with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
238
+ content = f.read()
239
+ defs = self._find_definitions(content, keyword, filename)
240
+ if defs:
241
+ definitions.append({
242
+ "file": str(rel_path),
243
+ "keyword": keyword,
244
+ "definitions": defs,
245
+ "match_type": "Function/Class definition",
246
+ })
247
+ except (IOError, OSError):
248
+ continue
249
+
250
+ return definitions[:10]
251
+
252
+ def _import_query(self, query: str) -> List[Dict[str, Any]]:
253
+ """Search for imports and dependencies.
254
+
255
+ Args:
256
+ query: Import query string
257
+
258
+ Returns:
259
+ List of imports found
260
+ """
261
+ keywords = self._extract_keywords(query)
262
+ imports = []
263
+
264
+ for root, _, files in os.walk(self.repo_path):
265
+ if self._should_skip_dir(root):
266
+ continue
267
+
268
+ for filename in files:
269
+ if filename.lower().endswith(self._supported_extensions):
270
+ filepath = Path(root) / filename
271
+ rel_path = filepath.relative_to(self.repo_path)
272
+
273
+ try:
274
+ with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
275
+ for line_num, line in enumerate(f, 1):
276
+ if "import" in line.lower() or "require" in line.lower():
277
+ for keyword in keywords:
278
+ if keyword.lower() in line.lower():
279
+ imports.append({
280
+ "file": str(rel_path),
281
+ "line": line_num,
282
+ "content": line.strip(),
283
+ "match_type": "Import/Require statement",
284
+ })
285
+ break
286
+ except (IOError, OSError):
287
+ continue
288
+
289
+ return imports[:20]
290
+
291
+ def _should_skip_dir(self, path: str) -> bool:
292
+ """Check if directory should be skipped.
293
+
294
+ Args:
295
+ path: Directory path
296
+
297
+ Returns:
298
+ True if should skip, False otherwise
299
+ """
300
+ skip_patterns = {"venv", ".git", "__pycache__", "node_modules", ".egg-info", "dist", "build", "vendor"}
301
+ for pattern in skip_patterns:
302
+ if pattern in path:
303
+ return True
304
+ return False
305
+
306
+ def _extract_relevant_content(self, content: str, keyword: str, lines: int = 20) -> str:
307
+ """Extract relevant content around keyword.
308
+
309
+ Args:
310
+ content: File content
311
+ keyword: Search keyword
312
+ lines: Number of lines to include
313
+
314
+ Returns:
315
+ Relevant content snippet
316
+ """
317
+ all_lines = content.split("\n")
318
+ keyword_lower = keyword.lower()
319
+
320
+ for i, line in enumerate(all_lines):
321
+ if keyword_lower in line.lower():
322
+ start = max(0, i - 2)
323
+ end = min(len(all_lines), i + lines)
324
+ snippet = "\n".join(all_lines[start:end])
325
+ return snippet[:1000] # Limit to 1000 chars
326
+
327
+ return ""
328
+
329
+ def _extract_keywords(self, query: str) -> List[str]:
330
+ """Extract search keywords from query.
331
+
332
+ Args:
333
+ query: Query string
334
+
335
+ Returns:
336
+ List of keywords
337
+ """
338
+ stopwords = {
339
+ "find", "search", "where", "locate", "the", "a", "an", "in", "for",
340
+ "define", "what", "how", "why", "function", "class", "import", "depend",
341
+ "all", "this", "does", "is", "are", "project", "code", "do", "can",
342
+ "will", "should", "by", "and", "or", "not", "to", "of", "with", "from",
343
+ "describe", "explain", "purpose", "method", "file", "files", "require",
344
+ "attribute", "check", "show", "tell", "list", "get", "work", "work",
345
+ "do", "does", "done", "doing", "on", "at", "it", "its", "have", "has"
346
+ }
347
+ words = query.split()
348
+ keywords = [w.strip("'\".,!?;:") for w in words
349
+ if w.lower() not in stopwords and len(w.strip("'\".,!?;:")) > 2]
350
+ return keywords
351
+
352
+ def _find_keyword_lines(self, content: str, keyword: str, context_lines: int = 3) -> List[Dict[str, Any]]:
353
+ """Find lines containing keyword with context.
354
+
355
+ Args:
356
+ content: File content
357
+ keyword: Keyword to search for
358
+ context_lines: Number of context lines to include
359
+
360
+ Returns:
361
+ List of matching lines with context
362
+ """
363
+ lines = content.split("\n")
364
+ matches = []
365
+ keyword_lower = keyword.lower()
366
+
367
+ for i, line in enumerate(lines):
368
+ if keyword_lower in line.lower():
369
+ start = max(0, i - context_lines)
370
+ end = min(len(lines), i + context_lines + 1)
371
+ matches.append({
372
+ "line_num": i + 1,
373
+ "content": line.strip(),
374
+ "context": "\n".join(lines[start:end]),
375
+ })
376
+ if len(matches) >= 3:
377
+ break
378
+
379
+ return matches
380
+
381
+ def _find_definitions(self, content: str, keyword: str, filename: str) -> List[Dict[str, Any]]:
382
+ """Find function/class definitions matching keyword.
383
+
384
+ Args:
385
+ content: File content
386
+ keyword: Keyword to search for
387
+ filename: Name of the file
388
+
389
+ Returns:
390
+ List of definitions found
391
+ """
392
+ definitions = []
393
+ lines = content.split("\n")
394
+ keyword_esc = re.escape(keyword)
395
+
396
+ # Build patterns based on file type
397
+ if filename.endswith(".py"):
398
+ patterns = [
399
+ (r"^\s*def\s+(\w*" + keyword_esc + r"\w*)\s*\(", "function"),
400
+ (r"^\s*class\s+(\w*" + keyword_esc + r"\w*)\s*[\(:]", "class"),
401
+ (r"^\s*async\s+def\s+(\w*" + keyword_esc + r"\w*)\s*\(", "async_function"),
402
+ ]
403
+ elif filename.endswith((".php", ".java", ".cpp", ".c", ".swift", ".kt")):
404
+ patterns = [
405
+ (r"(?:public|private|protected|static)?\s+(?:function|void|int|string|bool|class|interface|struct)\s+(\w*" + keyword_esc + r"\w*)\s*[\({\(]", "function/class"),
406
+ ]
407
+ elif filename.endswith((".js", ".ts", ".jsx", ".tsx")):
408
+ patterns = [
409
+ (r"(?:function|const|let|var)\s+(\w*" + keyword_esc + r"\w*)\s*[=\(]", "function"),
410
+ (r"class\s+(\w*" + keyword_esc + r"\w*)\s*[{]", "class"),
411
+ ]
412
+ else:
413
+ patterns = [
414
+ (r"(?:function|def|class)\s+(\w*" + keyword_esc + r"\w*)", "definition"),
415
+ ]
416
+
417
+ for i, line in enumerate(lines):
418
+ for pattern, def_type in patterns:
419
+ match = re.search(pattern, line, re.IGNORECASE)
420
+ if match:
421
+ # Get function/class body (next few lines)
422
+ body_start = i + 1
423
+ body_end = min(len(lines), i + 10)
424
+ body = "\n".join(lines[body_start:body_end])
425
+
426
+ definitions.append({
427
+ "line_num": i + 1,
428
+ "type": def_type,
429
+ "name": match.group(1),
430
+ "code": line.strip(),
431
+ "body": body[:500], # First 500 chars of body
432
+ })
433
+
434
+ return definitions
435
+
436
+ def _generate_summary(self, results: List[Dict[str, Any]]) -> str:
437
+ """Generate a summary of results.
438
+
439
+ Args:
440
+ results: List of search results
441
+
442
+ Returns:
443
+ Summary string
444
+ """
445
+ if not results:
446
+ return "No matching code found."
447
+
448
+ file_count = len(results)
449
+ total_matches = sum(
450
+ len(r.get("lines", [])) + len(r.get("definitions", []))
451
+ for r in results if r.get("lines") or r.get("definitions")
452
+ )
453
+
454
+ if any(r.get("type") == "filename_match" for r in results):
455
+ return f"Found {file_count} file(s) matching the query keywords."
456
+
457
+ if any(r.get("type") == "content_match" for r in results):
458
+ return f"Found {file_count} file(s) with relevant code content."
459
+
460
+ if total_matches > 0:
461
+ return f"Found {file_count} file(s) with {total_matches} match(es) in code."
462
+
463
+ return f"Found {file_count} relevant file(s)."
464
+
465
+ def get_history(self) -> List[Dict[str, Any]]:
466
+ """Get query history.
467
+
468
+ Returns:
469
+ List of past queries and results
470
+ """
471
+ return self.history
472
+
473
+ def clear_history(self) -> None:
474
+ """Clear query history."""
475
+ self.history = []
@@ -0,0 +1 @@
1
+ """Repository scanning module."""
@@ -0,0 +1,139 @@
1
+ """Repository scanner for analyzing code structure."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import List, Dict, Any, Optional
6
+ from dataclasses import dataclass
7
+
8
+
9
+ @dataclass
10
+ class TreeNode:
11
+ """Represents a node in the directory tree."""
12
+ name: str
13
+ is_dir: bool
14
+ children: List['TreeNode'] = None
15
+
16
+ def __post_init__(self):
17
+ if self.children is None:
18
+ self.children = []
19
+
20
+
21
+ class RepoScanner:
22
+ """Scans a repository to extract code structure and metadata."""
23
+
24
+ # Files/dirs to exclude from scan
25
+ EXCLUDE_PATTERNS = {
26
+ 'venv', '.git', '__pycache__', '.pytest_cache', '.egg-info',
27
+ 'node_modules', '.env', '.venv', 'dist', 'build', '.DS_Store',
28
+ '*.pyc', '.coverage', '.mypy_cache', 'htmlcov'
29
+ }
30
+
31
+ def __init__(self, repo_path: str = '.'):
32
+ """Initialize the repository scanner.
33
+
34
+ Args:
35
+ repo_path: Path to the repository to scan
36
+ """
37
+ self.repo_path = Path(repo_path)
38
+
39
+ def scan(self) -> Dict[str, Any]:
40
+ """Scan the repository and return structure information.
41
+
42
+ Returns:
43
+ Dictionary containing repository structure and metadata
44
+ """
45
+ if not self.repo_path.exists():
46
+ raise FileNotFoundError(f"Repository path not found: {self.repo_path}")
47
+
48
+ return {
49
+ "path": str(self.repo_path),
50
+ "tree": self._build_tree(),
51
+ "files": self._collect_files(),
52
+ "directories": self._collect_directories(),
53
+ }
54
+
55
+ def _should_exclude(self, path: str) -> bool:
56
+ """Check if a path should be excluded from scan."""
57
+ for pattern in self.EXCLUDE_PATTERNS:
58
+ if pattern.replace('*', '') in path:
59
+ return True
60
+ return False
61
+
62
+ def _build_tree(self, start_path: Optional[Path] = None) -> TreeNode:
63
+ """Build a tree structure of the repository.
64
+
65
+ Args:
66
+ start_path: Starting path for tree building (defaults to repo_path)
67
+
68
+ Returns:
69
+ Root TreeNode of the directory tree
70
+ """
71
+ if start_path is None:
72
+ start_path = self.repo_path
73
+
74
+ root = TreeNode(name=start_path.name or str(start_path), is_dir=True)
75
+ self._populate_tree(start_path, root)
76
+ return root
77
+
78
+ def _populate_tree(self, current_path: Path, node: TreeNode, depth: int = 0) -> None:
79
+ """Recursively populate tree with directory/file entries.
80
+
81
+ Args:
82
+ current_path: Current directory path
83
+ node: Current tree node
84
+ depth: Current recursion depth (limit to 10 to prevent infinite loops)
85
+ """
86
+ if depth > 10:
87
+ return
88
+
89
+ try:
90
+ entries = sorted(current_path.iterdir(), key=lambda x: (not x.is_dir(), x.name))
91
+ except PermissionError:
92
+ return
93
+
94
+ for entry in entries:
95
+ if self._should_exclude(entry.name):
96
+ continue
97
+
98
+ child = TreeNode(name=entry.name, is_dir=entry.is_dir())
99
+
100
+ if entry.is_dir():
101
+ self._populate_tree(entry, child, depth + 1)
102
+
103
+ node.children.append(child)
104
+
105
+ def _collect_files(self) -> List[str]:
106
+ """Collect all files in the repository.
107
+
108
+ Returns:
109
+ List of file paths
110
+ """
111
+ files = []
112
+ for root, _, filenames in os.walk(self.repo_path):
113
+ if self._should_exclude(root):
114
+ continue
115
+ for filename in filenames:
116
+ if self._should_exclude(filename):
117
+ continue
118
+ file_path = os.path.join(root, filename)
119
+ rel_path = os.path.relpath(file_path, self.repo_path)
120
+ files.append(rel_path)
121
+ return files
122
+
123
+ def _collect_directories(self) -> List[str]:
124
+ """Collect all directories in the repository.
125
+
126
+ Returns:
127
+ List of directory paths
128
+ """
129
+ directories = []
130
+ for root, dirnames, _ in os.walk(self.repo_path):
131
+ if self._should_exclude(root):
132
+ continue
133
+ for dirname in dirnames:
134
+ if self._should_exclude(dirname):
135
+ continue
136
+ dir_path = os.path.join(root, dirname)
137
+ rel_path = os.path.relpath(dir_path, self.repo_path)
138
+ directories.append(rel_path)
139
+ return directories
@@ -0,0 +1 @@
1
+ """Visualization module for code flows and dependencies."""