mcp-code-indexer 3.1.4__py3-none-any.whl → 3.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,131 +22,151 @@ logger = logging.getLogger(__name__)
22
22
  # Default patterns to ignore even without .gitignore
23
23
  DEFAULT_IGNORE_PATTERNS = [
24
24
  # Version control
25
- '.git/',
26
- '.svn/',
27
- '.hg/',
28
-
25
+ ".git/",
26
+ ".svn/",
27
+ ".hg/",
29
28
  # Dependencies and packages
30
- 'node_modules/',
31
- 'venv/',
32
- '.venv/',
33
- 'env/',
34
- '.env/',
35
- '__pycache__/',
36
- '*.pyc',
37
- '*.pyo',
38
- '*.pyd',
39
- '.Python',
40
-
29
+ "node_modules/",
30
+ "venv/",
31
+ ".venv/",
32
+ "env/",
33
+ ".env/",
34
+ "__pycache__/",
35
+ "*.pyc",
36
+ "*.pyo",
37
+ "*.pyd",
38
+ ".Python",
41
39
  # Build artifacts
42
- 'build/',
43
- 'dist/',
44
- 'target/',
45
- 'out/',
46
- 'bin/',
47
- 'obj/',
48
- '*.o',
49
- '*.so',
50
- '*.dylib',
51
- '*.dll',
52
- '*.exe',
53
-
40
+ "build/",
41
+ "dist/",
42
+ "target/",
43
+ "out/",
44
+ "bin/",
45
+ "obj/",
46
+ "*.o",
47
+ "*.so",
48
+ "*.dylib",
49
+ "*.dll",
50
+ "*.exe",
54
51
  # IDE and editor files
55
- '.vscode/',
56
- '.idea/',
57
- '.vs/',
58
- '*.swp',
59
- '*.swo',
60
- '*~',
61
- '.DS_Store',
62
- 'Thumbs.db',
63
-
52
+ ".vscode/",
53
+ ".idea/",
54
+ ".vs/",
55
+ "*.swp",
56
+ "*.swo",
57
+ "*~",
58
+ ".DS_Store",
59
+ "Thumbs.db",
64
60
  # Testing and coverage
65
- 'coverage/',
66
- 'htmlcov/',
67
- '.pytest_cache/',
68
- '.coverage',
69
- '*.coverage',
70
-
61
+ "coverage/",
62
+ "htmlcov/",
63
+ ".pytest_cache/",
64
+ ".coverage",
65
+ "*.coverage",
71
66
  # Documentation builds
72
- '_build/',
73
- 'docs/_build/',
74
- 'site/',
75
-
67
+ "_build/",
68
+ "docs/_build/",
69
+ "site/",
76
70
  # Logs and temporary files
77
- '*.log',
78
- '*.tmp',
79
- '*.temp',
80
- '*.cache',
81
-
71
+ "*.log",
72
+ "*.tmp",
73
+ "*.temp",
74
+ "*.cache",
82
75
  # Package files
83
- '*.tar.gz',
84
- '*.zip',
85
- '*.rar',
86
- '*.7z',
87
-
76
+ "*.tar.gz",
77
+ "*.zip",
78
+ "*.rar",
79
+ "*.7z",
88
80
  # Lock files
89
- 'package-lock.json',
90
- 'yarn.lock',
91
- 'Pipfile.lock',
92
- 'poetry.lock',
81
+ "package-lock.json",
82
+ "yarn.lock",
83
+ "Pipfile.lock",
84
+ "poetry.lock",
93
85
  ]
94
86
 
95
87
  # File extensions commonly ignored for code indexing
96
88
  IGNORED_EXTENSIONS = {
97
89
  # Binary files
98
- '.exe', '.dll', '.so', '.dylib', '.bin', '.o', '.obj',
99
-
90
+ ".exe",
91
+ ".dll",
92
+ ".so",
93
+ ".dylib",
94
+ ".bin",
95
+ ".o",
96
+ ".obj",
100
97
  # Images
101
- '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.svg',
102
-
98
+ ".png",
99
+ ".jpg",
100
+ ".jpeg",
101
+ ".gif",
102
+ ".bmp",
103
+ ".ico",
104
+ ".svg",
103
105
  # Documents
104
- '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
105
-
106
+ ".pdf",
107
+ ".doc",
108
+ ".docx",
109
+ ".xls",
110
+ ".xlsx",
111
+ ".ppt",
112
+ ".pptx",
106
113
  # Media
107
- '.mp3', '.mp4', '.avi', '.mov', '.wmv', '.flv',
108
-
114
+ ".mp3",
115
+ ".mp4",
116
+ ".avi",
117
+ ".mov",
118
+ ".wmv",
119
+ ".flv",
109
120
  # Archives
110
- '.zip', '.tar', '.gz', '.rar', '.7z',
111
-
121
+ ".zip",
122
+ ".tar",
123
+ ".gz",
124
+ ".rar",
125
+ ".7z",
112
126
  # Fonts
113
- '.ttf', '.otf', '.woff', '.woff2', '.eot',
127
+ ".ttf",
128
+ ".otf",
129
+ ".woff",
130
+ ".woff2",
131
+ ".eot",
114
132
  }
115
133
 
116
134
 
117
135
  class FileScanner:
118
136
  """
119
137
  Handles file discovery with gitignore and pattern-based filtering.
120
-
138
+
121
139
  Provides methods to scan directories while respecting .gitignore files
122
140
  and default ignore patterns to identify files suitable for description tracking.
123
141
  """
124
-
142
+
125
143
  def __init__(self, project_root: Path):
126
144
  """
127
145
  Initialize file scanner for a project.
128
-
146
+
129
147
  Args:
130
148
  project_root: Root directory of the project to scan
131
149
  """
132
150
  self.project_root = Path(project_root).resolve()
133
151
  self._gitignore_cache: dict = {}
134
152
  self._load_gitignore_patterns()
135
-
153
+
136
154
  def _load_gitignore_patterns(self) -> None:
137
155
  """Load and cache gitignore patterns from the project."""
138
156
  self._gitignore_cache.clear()
139
-
157
+
140
158
  if parse_gitignore is None:
141
- logger.warning("gitignore_parser not available, using default patterns only")
159
+ logger.warning(
160
+ "gitignore_parser not available, using default patterns only"
161
+ )
142
162
  return
143
-
163
+
144
164
  # Look for .gitignore files in the project hierarchy
145
165
  current_path = self.project_root
146
-
166
+
147
167
  while current_path != current_path.parent:
148
- gitignore_path = current_path / '.gitignore'
149
-
168
+ gitignore_path = current_path / ".gitignore"
169
+
150
170
  if gitignore_path.exists():
151
171
  try:
152
172
  gitignore_func = parse_gitignore(gitignore_path)
@@ -154,14 +174,14 @@ class FileScanner:
154
174
  logger.debug(f"Loaded .gitignore from {gitignore_path}")
155
175
  except Exception as e:
156
176
  logger.warning(f"Failed to parse {gitignore_path}: {e}")
157
-
177
+
158
178
  current_path = current_path.parent
159
-
179
+
160
180
  def _is_ignored_by_gitignore(self, file_path: Path) -> bool:
161
181
  """Check if a file is ignored by any .gitignore file."""
162
182
  if not self._gitignore_cache:
163
183
  return False
164
-
184
+
165
185
  # Check against all loaded .gitignore patterns
166
186
  for base_path, gitignore_func in self._gitignore_cache.items():
167
187
  try:
@@ -171,9 +191,9 @@ class FileScanner:
171
191
  except Exception as e:
172
192
  logger.debug(f"Error checking gitignore pattern: {e}")
173
193
  continue
174
-
194
+
175
195
  return False
176
-
196
+
177
197
  def _is_ignored_by_default_patterns(self, file_path: Path) -> bool:
178
198
  """Check if a file matches default ignore patterns."""
179
199
  try:
@@ -183,11 +203,11 @@ class FileScanner:
183
203
  rel_path_str = str(rel_path)
184
204
  except ValueError:
185
205
  return True
186
-
206
+
187
207
  for pattern in DEFAULT_IGNORE_PATTERNS:
188
208
  # Handle directory patterns
189
- if pattern.endswith('/'):
190
- pattern_no_slash = pattern.rstrip('/')
209
+ if pattern.endswith("/"):
210
+ pattern_no_slash = pattern.rstrip("/")
191
211
  # Check if any parent directory matches
192
212
  for parent in rel_path.parents:
193
213
  if fnmatch.fnmatch(parent.name, pattern_no_slash):
@@ -201,89 +221,89 @@ class FileScanner:
201
221
  return True
202
222
  if fnmatch.fnmatch(file_path.name, pattern):
203
223
  return True
204
-
224
+
205
225
  return False
206
-
226
+
207
227
  def _is_ignored_by_extension(self, file_path: Path) -> bool:
208
228
  """Check if a file has an ignored extension."""
209
229
  return file_path.suffix.lower() in IGNORED_EXTENSIONS
210
-
230
+
211
231
  def should_ignore_file(self, file_path: Path) -> bool:
212
232
  """
213
233
  Determine if a file should be ignored.
214
-
234
+
215
235
  Args:
216
236
  file_path: Path to the file to check
217
-
237
+
218
238
  Returns:
219
239
  True if the file should be ignored
220
240
  """
221
241
  # Check if it's a file (not directory)
222
242
  if not file_path.is_file():
223
243
  return True
224
-
244
+
225
245
  # Check file extension
226
246
  if self._is_ignored_by_extension(file_path):
227
247
  return True
228
-
248
+
229
249
  # Check default patterns
230
250
  if self._is_ignored_by_default_patterns(file_path):
231
251
  return True
232
-
252
+
233
253
  # Check gitignore patterns
234
254
  if self._is_ignored_by_gitignore(file_path):
235
255
  return True
236
-
256
+
237
257
  return False
238
-
258
+
239
259
  def scan_directory(self, max_files: Optional[int] = None) -> List[Path]:
240
260
  """
241
261
  Scan the project directory for trackable files.
242
-
262
+
243
263
  Args:
244
264
  max_files: Maximum number of files to return (None for no limit)
245
-
265
+
246
266
  Returns:
247
267
  List of file paths that should be tracked
248
268
  """
249
269
  files = []
250
-
270
+
251
271
  try:
252
272
  for file_path in self._walk_directory():
253
273
  if not self.should_ignore_file(file_path):
254
274
  files.append(file_path)
255
-
275
+
256
276
  if max_files and len(files) >= max_files:
257
277
  logger.info(f"Reached max_files limit of {max_files}")
258
278
  break
259
-
279
+
260
280
  except Exception as e:
261
281
  logger.error(f"Error scanning directory {self.project_root}: {e}")
262
-
282
+
263
283
  # Sort files for consistent ordering
264
284
  files.sort()
265
-
285
+
266
286
  logger.info(f"Found {len(files)} trackable files in {self.project_root}")
267
287
  return files
268
-
288
+
269
289
  def _walk_directory(self) -> Generator[Path, None, None]:
270
290
  """Walk through all files in the project directory."""
271
291
  try:
272
- for item in self.project_root.rglob('*'):
292
+ for item in self.project_root.rglob("*"):
273
293
  if item.is_file():
274
294
  yield item
275
295
  except PermissionError as e:
276
296
  logger.warning(f"Permission denied accessing {e.filename}")
277
297
  except Exception as e:
278
298
  logger.error(f"Error walking directory: {e}")
279
-
299
+
280
300
  def get_relative_path(self, file_path: Path) -> str:
281
301
  """
282
302
  Get relative path from project root.
283
-
303
+
284
304
  Args:
285
305
  file_path: Absolute path to file
286
-
306
+
287
307
  Returns:
288
308
  Relative path string from project root
289
309
  """
@@ -295,87 +315,89 @@ class FileScanner:
295
315
  except ValueError:
296
316
  # File is outside project root, return absolute path
297
317
  return str(file_path)
298
-
318
+
299
319
  def find_missing_files(self, existing_paths: Set[str]) -> List[Path]:
300
320
  """
301
321
  Find files that exist on disk but aren't in the existing paths set.
302
-
322
+
303
323
  Args:
304
324
  existing_paths: Set of relative file paths that already have descriptions
305
-
325
+
306
326
  Returns:
307
327
  List of file paths that are missing descriptions
308
328
  """
309
329
  all_files = self.scan_directory()
310
330
  missing_files = []
311
-
331
+
312
332
  for file_path in all_files:
313
333
  rel_path = self.get_relative_path(file_path)
314
334
  if rel_path not in existing_paths:
315
335
  missing_files.append(file_path)
316
-
336
+
317
337
  logger.info(f"Found {len(missing_files)} files missing descriptions")
318
338
  return missing_files
319
-
339
+
320
340
  def is_valid_project_directory(self) -> bool:
321
341
  """
322
342
  Check if the project root is a valid directory for scanning.
323
-
343
+
324
344
  Returns:
325
345
  True if the directory exists and is accessible
326
346
  """
327
347
  try:
328
348
  return (
329
- self.project_root.exists() and
330
- self.project_root.is_dir() and
331
- self.project_root.stat().st_mode & 0o444 # Readable
349
+ self.project_root.exists()
350
+ and self.project_root.is_dir()
351
+ and self.project_root.stat().st_mode & 0o444 # Readable
332
352
  )
333
353
  except (OSError, PermissionError):
334
354
  return False
335
-
355
+
336
356
  def get_project_stats(self) -> dict:
337
357
  """
338
358
  Get statistics about the project directory.
339
-
359
+
340
360
  Returns:
341
361
  Dictionary with project statistics for trackable files only
342
362
  """
343
363
  stats = {
344
- 'total_files': 0,
345
- 'trackable_files': 0,
346
- 'ignored_files': 0,
347
- 'largest_file_size': 0,
348
- 'file_extensions': {},
364
+ "total_files": 0,
365
+ "trackable_files": 0,
366
+ "ignored_files": 0,
367
+ "largest_file_size": 0,
368
+ "file_extensions": {},
349
369
  }
350
-
370
+
351
371
  try:
352
372
  all_files_count = 0
353
373
  for file_path in self._walk_directory():
354
374
  all_files_count += 1
355
-
375
+
356
376
  # Check if trackable first
357
377
  if self.should_ignore_file(file_path):
358
- stats['ignored_files'] += 1
378
+ stats["ignored_files"] += 1
359
379
  continue
360
-
380
+
361
381
  # Only process trackable files for detailed stats
362
- stats['trackable_files'] += 1
363
-
382
+ stats["trackable_files"] += 1
383
+
364
384
  # Track file size
365
385
  try:
366
386
  file_size = file_path.stat().st_size
367
- stats['largest_file_size'] = max(stats['largest_file_size'], file_size)
387
+ stats["largest_file_size"] = max(
388
+ stats["largest_file_size"], file_size
389
+ )
368
390
  except OSError:
369
391
  pass
370
-
392
+
371
393
  # Track extensions for trackable files only
372
394
  ext = file_path.suffix.lower()
373
- stats['file_extensions'][ext] = stats['file_extensions'].get(ext, 0) + 1
374
-
395
+ stats["file_extensions"][ext] = stats["file_extensions"].get(ext, 0) + 1
396
+
375
397
  # Total files is just trackable files
376
- stats['total_files'] = stats['trackable_files']
377
-
398
+ stats["total_files"] = stats["trackable_files"]
399
+
378
400
  except Exception as e:
379
401
  logger.error(f"Error getting project stats: {e}")
380
-
402
+
381
403
  return stats