mcp-code-indexer 3.1.4__py3-none-any.whl → 3.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/__init__.py +8 -6
- mcp_code_indexer/ask_handler.py +105 -75
- mcp_code_indexer/claude_api_handler.py +125 -82
- mcp_code_indexer/cleanup_manager.py +107 -81
- mcp_code_indexer/database/connection_health.py +212 -161
- mcp_code_indexer/database/database.py +529 -415
- mcp_code_indexer/database/exceptions.py +167 -118
- mcp_code_indexer/database/models.py +54 -19
- mcp_code_indexer/database/retry_executor.py +139 -103
- mcp_code_indexer/deepask_handler.py +178 -140
- mcp_code_indexer/error_handler.py +88 -76
- mcp_code_indexer/file_scanner.py +163 -141
- mcp_code_indexer/git_hook_handler.py +352 -261
- mcp_code_indexer/logging_config.py +76 -94
- mcp_code_indexer/main.py +406 -320
- mcp_code_indexer/middleware/error_middleware.py +106 -71
- mcp_code_indexer/query_preprocessor.py +40 -40
- mcp_code_indexer/server/mcp_server.py +785 -470
- mcp_code_indexer/token_counter.py +54 -47
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/METADATA +3 -3
- mcp_code_indexer-3.1.5.dist-info/RECORD +37 -0
- mcp_code_indexer-3.1.4.dist-info/RECORD +0 -37
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/licenses/LICENSE +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/top_level.txt +0 -0
mcp_code_indexer/file_scanner.py
CHANGED
@@ -22,131 +22,151 @@ logger = logging.getLogger(__name__)
|
|
22
22
|
# Default patterns to ignore even without .gitignore
|
23
23
|
DEFAULT_IGNORE_PATTERNS = [
|
24
24
|
# Version control
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
".git/",
|
26
|
+
".svn/",
|
27
|
+
".hg/",
|
29
28
|
# Dependencies and packages
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
29
|
+
"node_modules/",
|
30
|
+
"venv/",
|
31
|
+
".venv/",
|
32
|
+
"env/",
|
33
|
+
".env/",
|
34
|
+
"__pycache__/",
|
35
|
+
"*.pyc",
|
36
|
+
"*.pyo",
|
37
|
+
"*.pyd",
|
38
|
+
".Python",
|
41
39
|
# Build artifacts
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
40
|
+
"build/",
|
41
|
+
"dist/",
|
42
|
+
"target/",
|
43
|
+
"out/",
|
44
|
+
"bin/",
|
45
|
+
"obj/",
|
46
|
+
"*.o",
|
47
|
+
"*.so",
|
48
|
+
"*.dylib",
|
49
|
+
"*.dll",
|
50
|
+
"*.exe",
|
54
51
|
# IDE and editor files
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
52
|
+
".vscode/",
|
53
|
+
".idea/",
|
54
|
+
".vs/",
|
55
|
+
"*.swp",
|
56
|
+
"*.swo",
|
57
|
+
"*~",
|
58
|
+
".DS_Store",
|
59
|
+
"Thumbs.db",
|
64
60
|
# Testing and coverage
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
61
|
+
"coverage/",
|
62
|
+
"htmlcov/",
|
63
|
+
".pytest_cache/",
|
64
|
+
".coverage",
|
65
|
+
"*.coverage",
|
71
66
|
# Documentation builds
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
67
|
+
"_build/",
|
68
|
+
"docs/_build/",
|
69
|
+
"site/",
|
76
70
|
# Logs and temporary files
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
71
|
+
"*.log",
|
72
|
+
"*.tmp",
|
73
|
+
"*.temp",
|
74
|
+
"*.cache",
|
82
75
|
# Package files
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
76
|
+
"*.tar.gz",
|
77
|
+
"*.zip",
|
78
|
+
"*.rar",
|
79
|
+
"*.7z",
|
88
80
|
# Lock files
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
81
|
+
"package-lock.json",
|
82
|
+
"yarn.lock",
|
83
|
+
"Pipfile.lock",
|
84
|
+
"poetry.lock",
|
93
85
|
]
|
94
86
|
|
95
87
|
# File extensions commonly ignored for code indexing
|
96
88
|
IGNORED_EXTENSIONS = {
|
97
89
|
# Binary files
|
98
|
-
|
99
|
-
|
90
|
+
".exe",
|
91
|
+
".dll",
|
92
|
+
".so",
|
93
|
+
".dylib",
|
94
|
+
".bin",
|
95
|
+
".o",
|
96
|
+
".obj",
|
100
97
|
# Images
|
101
|
-
|
102
|
-
|
98
|
+
".png",
|
99
|
+
".jpg",
|
100
|
+
".jpeg",
|
101
|
+
".gif",
|
102
|
+
".bmp",
|
103
|
+
".ico",
|
104
|
+
".svg",
|
103
105
|
# Documents
|
104
|
-
|
105
|
-
|
106
|
+
".pdf",
|
107
|
+
".doc",
|
108
|
+
".docx",
|
109
|
+
".xls",
|
110
|
+
".xlsx",
|
111
|
+
".ppt",
|
112
|
+
".pptx",
|
106
113
|
# Media
|
107
|
-
|
108
|
-
|
114
|
+
".mp3",
|
115
|
+
".mp4",
|
116
|
+
".avi",
|
117
|
+
".mov",
|
118
|
+
".wmv",
|
119
|
+
".flv",
|
109
120
|
# Archives
|
110
|
-
|
111
|
-
|
121
|
+
".zip",
|
122
|
+
".tar",
|
123
|
+
".gz",
|
124
|
+
".rar",
|
125
|
+
".7z",
|
112
126
|
# Fonts
|
113
|
-
|
127
|
+
".ttf",
|
128
|
+
".otf",
|
129
|
+
".woff",
|
130
|
+
".woff2",
|
131
|
+
".eot",
|
114
132
|
}
|
115
133
|
|
116
134
|
|
117
135
|
class FileScanner:
|
118
136
|
"""
|
119
137
|
Handles file discovery with gitignore and pattern-based filtering.
|
120
|
-
|
138
|
+
|
121
139
|
Provides methods to scan directories while respecting .gitignore files
|
122
140
|
and default ignore patterns to identify files suitable for description tracking.
|
123
141
|
"""
|
124
|
-
|
142
|
+
|
125
143
|
def __init__(self, project_root: Path):
|
126
144
|
"""
|
127
145
|
Initialize file scanner for a project.
|
128
|
-
|
146
|
+
|
129
147
|
Args:
|
130
148
|
project_root: Root directory of the project to scan
|
131
149
|
"""
|
132
150
|
self.project_root = Path(project_root).resolve()
|
133
151
|
self._gitignore_cache: dict = {}
|
134
152
|
self._load_gitignore_patterns()
|
135
|
-
|
153
|
+
|
136
154
|
def _load_gitignore_patterns(self) -> None:
|
137
155
|
"""Load and cache gitignore patterns from the project."""
|
138
156
|
self._gitignore_cache.clear()
|
139
|
-
|
157
|
+
|
140
158
|
if parse_gitignore is None:
|
141
|
-
logger.warning(
|
159
|
+
logger.warning(
|
160
|
+
"gitignore_parser not available, using default patterns only"
|
161
|
+
)
|
142
162
|
return
|
143
|
-
|
163
|
+
|
144
164
|
# Look for .gitignore files in the project hierarchy
|
145
165
|
current_path = self.project_root
|
146
|
-
|
166
|
+
|
147
167
|
while current_path != current_path.parent:
|
148
|
-
gitignore_path = current_path /
|
149
|
-
|
168
|
+
gitignore_path = current_path / ".gitignore"
|
169
|
+
|
150
170
|
if gitignore_path.exists():
|
151
171
|
try:
|
152
172
|
gitignore_func = parse_gitignore(gitignore_path)
|
@@ -154,14 +174,14 @@ class FileScanner:
|
|
154
174
|
logger.debug(f"Loaded .gitignore from {gitignore_path}")
|
155
175
|
except Exception as e:
|
156
176
|
logger.warning(f"Failed to parse {gitignore_path}: {e}")
|
157
|
-
|
177
|
+
|
158
178
|
current_path = current_path.parent
|
159
|
-
|
179
|
+
|
160
180
|
def _is_ignored_by_gitignore(self, file_path: Path) -> bool:
|
161
181
|
"""Check if a file is ignored by any .gitignore file."""
|
162
182
|
if not self._gitignore_cache:
|
163
183
|
return False
|
164
|
-
|
184
|
+
|
165
185
|
# Check against all loaded .gitignore patterns
|
166
186
|
for base_path, gitignore_func in self._gitignore_cache.items():
|
167
187
|
try:
|
@@ -171,9 +191,9 @@ class FileScanner:
|
|
171
191
|
except Exception as e:
|
172
192
|
logger.debug(f"Error checking gitignore pattern: {e}")
|
173
193
|
continue
|
174
|
-
|
194
|
+
|
175
195
|
return False
|
176
|
-
|
196
|
+
|
177
197
|
def _is_ignored_by_default_patterns(self, file_path: Path) -> bool:
|
178
198
|
"""Check if a file matches default ignore patterns."""
|
179
199
|
try:
|
@@ -183,11 +203,11 @@ class FileScanner:
|
|
183
203
|
rel_path_str = str(rel_path)
|
184
204
|
except ValueError:
|
185
205
|
return True
|
186
|
-
|
206
|
+
|
187
207
|
for pattern in DEFAULT_IGNORE_PATTERNS:
|
188
208
|
# Handle directory patterns
|
189
|
-
if pattern.endswith(
|
190
|
-
pattern_no_slash = pattern.rstrip(
|
209
|
+
if pattern.endswith("/"):
|
210
|
+
pattern_no_slash = pattern.rstrip("/")
|
191
211
|
# Check if any parent directory matches
|
192
212
|
for parent in rel_path.parents:
|
193
213
|
if fnmatch.fnmatch(parent.name, pattern_no_slash):
|
@@ -201,89 +221,89 @@ class FileScanner:
|
|
201
221
|
return True
|
202
222
|
if fnmatch.fnmatch(file_path.name, pattern):
|
203
223
|
return True
|
204
|
-
|
224
|
+
|
205
225
|
return False
|
206
|
-
|
226
|
+
|
207
227
|
def _is_ignored_by_extension(self, file_path: Path) -> bool:
|
208
228
|
"""Check if a file has an ignored extension."""
|
209
229
|
return file_path.suffix.lower() in IGNORED_EXTENSIONS
|
210
|
-
|
230
|
+
|
211
231
|
def should_ignore_file(self, file_path: Path) -> bool:
|
212
232
|
"""
|
213
233
|
Determine if a file should be ignored.
|
214
|
-
|
234
|
+
|
215
235
|
Args:
|
216
236
|
file_path: Path to the file to check
|
217
|
-
|
237
|
+
|
218
238
|
Returns:
|
219
239
|
True if the file should be ignored
|
220
240
|
"""
|
221
241
|
# Check if it's a file (not directory)
|
222
242
|
if not file_path.is_file():
|
223
243
|
return True
|
224
|
-
|
244
|
+
|
225
245
|
# Check file extension
|
226
246
|
if self._is_ignored_by_extension(file_path):
|
227
247
|
return True
|
228
|
-
|
248
|
+
|
229
249
|
# Check default patterns
|
230
250
|
if self._is_ignored_by_default_patterns(file_path):
|
231
251
|
return True
|
232
|
-
|
252
|
+
|
233
253
|
# Check gitignore patterns
|
234
254
|
if self._is_ignored_by_gitignore(file_path):
|
235
255
|
return True
|
236
|
-
|
256
|
+
|
237
257
|
return False
|
238
|
-
|
258
|
+
|
239
259
|
def scan_directory(self, max_files: Optional[int] = None) -> List[Path]:
|
240
260
|
"""
|
241
261
|
Scan the project directory for trackable files.
|
242
|
-
|
262
|
+
|
243
263
|
Args:
|
244
264
|
max_files: Maximum number of files to return (None for no limit)
|
245
|
-
|
265
|
+
|
246
266
|
Returns:
|
247
267
|
List of file paths that should be tracked
|
248
268
|
"""
|
249
269
|
files = []
|
250
|
-
|
270
|
+
|
251
271
|
try:
|
252
272
|
for file_path in self._walk_directory():
|
253
273
|
if not self.should_ignore_file(file_path):
|
254
274
|
files.append(file_path)
|
255
|
-
|
275
|
+
|
256
276
|
if max_files and len(files) >= max_files:
|
257
277
|
logger.info(f"Reached max_files limit of {max_files}")
|
258
278
|
break
|
259
|
-
|
279
|
+
|
260
280
|
except Exception as e:
|
261
281
|
logger.error(f"Error scanning directory {self.project_root}: {e}")
|
262
|
-
|
282
|
+
|
263
283
|
# Sort files for consistent ordering
|
264
284
|
files.sort()
|
265
|
-
|
285
|
+
|
266
286
|
logger.info(f"Found {len(files)} trackable files in {self.project_root}")
|
267
287
|
return files
|
268
|
-
|
288
|
+
|
269
289
|
def _walk_directory(self) -> Generator[Path, None, None]:
|
270
290
|
"""Walk through all files in the project directory."""
|
271
291
|
try:
|
272
|
-
for item in self.project_root.rglob(
|
292
|
+
for item in self.project_root.rglob("*"):
|
273
293
|
if item.is_file():
|
274
294
|
yield item
|
275
295
|
except PermissionError as e:
|
276
296
|
logger.warning(f"Permission denied accessing {e.filename}")
|
277
297
|
except Exception as e:
|
278
298
|
logger.error(f"Error walking directory: {e}")
|
279
|
-
|
299
|
+
|
280
300
|
def get_relative_path(self, file_path: Path) -> str:
|
281
301
|
"""
|
282
302
|
Get relative path from project root.
|
283
|
-
|
303
|
+
|
284
304
|
Args:
|
285
305
|
file_path: Absolute path to file
|
286
|
-
|
306
|
+
|
287
307
|
Returns:
|
288
308
|
Relative path string from project root
|
289
309
|
"""
|
@@ -295,87 +315,89 @@ class FileScanner:
|
|
295
315
|
except ValueError:
|
296
316
|
# File is outside project root, return absolute path
|
297
317
|
return str(file_path)
|
298
|
-
|
318
|
+
|
299
319
|
def find_missing_files(self, existing_paths: Set[str]) -> List[Path]:
|
300
320
|
"""
|
301
321
|
Find files that exist on disk but aren't in the existing paths set.
|
302
|
-
|
322
|
+
|
303
323
|
Args:
|
304
324
|
existing_paths: Set of relative file paths that already have descriptions
|
305
|
-
|
325
|
+
|
306
326
|
Returns:
|
307
327
|
List of file paths that are missing descriptions
|
308
328
|
"""
|
309
329
|
all_files = self.scan_directory()
|
310
330
|
missing_files = []
|
311
|
-
|
331
|
+
|
312
332
|
for file_path in all_files:
|
313
333
|
rel_path = self.get_relative_path(file_path)
|
314
334
|
if rel_path not in existing_paths:
|
315
335
|
missing_files.append(file_path)
|
316
|
-
|
336
|
+
|
317
337
|
logger.info(f"Found {len(missing_files)} files missing descriptions")
|
318
338
|
return missing_files
|
319
|
-
|
339
|
+
|
320
340
|
def is_valid_project_directory(self) -> bool:
|
321
341
|
"""
|
322
342
|
Check if the project root is a valid directory for scanning.
|
323
|
-
|
343
|
+
|
324
344
|
Returns:
|
325
345
|
True if the directory exists and is accessible
|
326
346
|
"""
|
327
347
|
try:
|
328
348
|
return (
|
329
|
-
self.project_root.exists()
|
330
|
-
self.project_root.is_dir()
|
331
|
-
self.project_root.stat().st_mode & 0o444 # Readable
|
349
|
+
self.project_root.exists()
|
350
|
+
and self.project_root.is_dir()
|
351
|
+
and self.project_root.stat().st_mode & 0o444 # Readable
|
332
352
|
)
|
333
353
|
except (OSError, PermissionError):
|
334
354
|
return False
|
335
|
-
|
355
|
+
|
336
356
|
def get_project_stats(self) -> dict:
|
337
357
|
"""
|
338
358
|
Get statistics about the project directory.
|
339
|
-
|
359
|
+
|
340
360
|
Returns:
|
341
361
|
Dictionary with project statistics for trackable files only
|
342
362
|
"""
|
343
363
|
stats = {
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
364
|
+
"total_files": 0,
|
365
|
+
"trackable_files": 0,
|
366
|
+
"ignored_files": 0,
|
367
|
+
"largest_file_size": 0,
|
368
|
+
"file_extensions": {},
|
349
369
|
}
|
350
|
-
|
370
|
+
|
351
371
|
try:
|
352
372
|
all_files_count = 0
|
353
373
|
for file_path in self._walk_directory():
|
354
374
|
all_files_count += 1
|
355
|
-
|
375
|
+
|
356
376
|
# Check if trackable first
|
357
377
|
if self.should_ignore_file(file_path):
|
358
|
-
stats[
|
378
|
+
stats["ignored_files"] += 1
|
359
379
|
continue
|
360
|
-
|
380
|
+
|
361
381
|
# Only process trackable files for detailed stats
|
362
|
-
stats[
|
363
|
-
|
382
|
+
stats["trackable_files"] += 1
|
383
|
+
|
364
384
|
# Track file size
|
365
385
|
try:
|
366
386
|
file_size = file_path.stat().st_size
|
367
|
-
stats[
|
387
|
+
stats["largest_file_size"] = max(
|
388
|
+
stats["largest_file_size"], file_size
|
389
|
+
)
|
368
390
|
except OSError:
|
369
391
|
pass
|
370
|
-
|
392
|
+
|
371
393
|
# Track extensions for trackable files only
|
372
394
|
ext = file_path.suffix.lower()
|
373
|
-
stats[
|
374
|
-
|
395
|
+
stats["file_extensions"][ext] = stats["file_extensions"].get(ext, 0) + 1
|
396
|
+
|
375
397
|
# Total files is just trackable files
|
376
|
-
stats[
|
377
|
-
|
398
|
+
stats["total_files"] = stats["trackable_files"]
|
399
|
+
|
378
400
|
except Exception as e:
|
379
401
|
logger.error(f"Error getting project stats: {e}")
|
380
|
-
|
402
|
+
|
381
403
|
return stats
|