mcp-code-indexer 4.2.15__py3-none-any.whl → 4.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. mcp_code_indexer/database/database.py +334 -115
  2. mcp_code_indexer/database/database_factory.py +1 -1
  3. mcp_code_indexer/database/exceptions.py +1 -1
  4. mcp_code_indexer/database/models.py +66 -24
  5. mcp_code_indexer/database/retry_executor.py +15 -5
  6. mcp_code_indexer/file_scanner.py +107 -12
  7. mcp_code_indexer/main.py +43 -30
  8. mcp_code_indexer/server/mcp_server.py +201 -7
  9. mcp_code_indexer/vector_mode/chunking/ast_chunker.py +103 -84
  10. mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +1 -0
  11. mcp_code_indexer/vector_mode/config.py +113 -45
  12. mcp_code_indexer/vector_mode/const.py +24 -0
  13. mcp_code_indexer/vector_mode/daemon.py +860 -98
  14. mcp_code_indexer/vector_mode/monitoring/change_detector.py +113 -97
  15. mcp_code_indexer/vector_mode/monitoring/file_watcher.py +175 -121
  16. mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +291 -98
  17. mcp_code_indexer/vector_mode/providers/voyage_client.py +140 -38
  18. mcp_code_indexer/vector_mode/services/__init__.py +9 -0
  19. mcp_code_indexer/vector_mode/services/embedding_service.py +389 -0
  20. mcp_code_indexer/vector_mode/services/vector_mode_tools_service.py +459 -0
  21. mcp_code_indexer/vector_mode/services/vector_storage_service.py +580 -0
  22. mcp_code_indexer/vector_mode/types.py +46 -0
  23. mcp_code_indexer/vector_mode/utils.py +50 -0
  24. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/METADATA +13 -10
  25. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/RECORD +28 -21
  26. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/WHEEL +1 -1
  27. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/entry_points.txt +0 -0
  28. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info/licenses}/LICENSE +0 -0
@@ -7,14 +7,17 @@ for the vector mode indexing pipeline.
7
7
 
8
8
  import asyncio
9
9
  import logging
10
+
10
11
  from pathlib import Path
11
12
  from typing import Callable, Optional, List, Dict, Any
12
13
  import time
13
14
  from concurrent.futures import ThreadPoolExecutor
15
+ from abc import ABC, abstractmethod
14
16
 
15
17
  try:
16
- from watchdog.observers import Observer
18
+ from watchdog.observers import Observer, ObserverType
17
19
  from watchdog.events import FileSystemEventHandler, FileSystemEvent
20
+
18
21
  WATCHDOG_AVAILABLE = True
19
22
  except ImportError:
20
23
  WATCHDOG_AVAILABLE = False
@@ -27,9 +30,34 @@ from .merkle_tree import MerkleTree
27
30
 
28
31
  logger = logging.getLogger(__name__)
29
32
 
33
+
34
+ class BaseFileWatcher(ABC):
35
+ """Abstract base class for file watchers."""
36
+
37
+ @abstractmethod
38
+ async def initialize(self) -> None:
39
+ """Initialize the file watcher."""
40
+ pass
41
+
42
+ @abstractmethod
43
+ def add_change_callback(self, callback: Callable[[FileChange], None]) -> None:
44
+ """Add a callback to be called when files change."""
45
+ pass
46
+
47
+ @abstractmethod
48
+ def start_watching(self) -> None:
49
+ """Start watching for file changes."""
50
+ pass
51
+
52
+ @abstractmethod
53
+ def get_stats(self) -> Dict[str, Any]:
54
+ """Get watcher statistics."""
55
+ pass
56
+
57
+
30
58
  class VectorModeEventHandler(FileSystemEventHandler):
31
59
  """Event handler for file system changes in vector mode."""
32
-
60
+
33
61
  def __init__(
34
62
  self,
35
63
  change_detector: ChangeDetector,
@@ -38,7 +66,7 @@ class VectorModeEventHandler(FileSystemEventHandler):
38
66
  ):
39
67
  """
40
68
  Initialize event handler.
41
-
69
+
42
70
  Args:
43
71
  change_detector: Change detection processor
44
72
  merkle_tree: Optional Merkle tree for efficient change tracking
@@ -48,85 +76,94 @@ class VectorModeEventHandler(FileSystemEventHandler):
48
76
  self.change_detector = change_detector
49
77
  self.merkle_tree = merkle_tree
50
78
  self.callback = callback
51
-
79
+
52
80
  # Debouncing state
53
81
  self.pending_events: Dict[str, FileSystemEvent] = {}
54
82
  self.debounce_tasks: Dict[str, asyncio.Task] = {}
55
-
83
+
56
84
  def on_any_event(self, event: FileSystemEvent) -> None:
57
85
  """Handle any file system event."""
58
- if event.is_directory:
59
- return # Skip directory events for now
60
-
61
86
  try:
62
- asyncio.create_task(self._handle_event_async(event))
63
- except RuntimeError:
64
- # No event loop running, handle synchronously
65
- self._handle_event_sync(event)
66
-
87
+ if event.is_directory:
88
+ return # Skip directory events for now
89
+
90
+ try:
91
+ asyncio.create_task(self._handle_event_async(event))
92
+ except RuntimeError:
93
+ # No event loop running, handle synchronously
94
+ self._handle_event_sync(event)
95
+
96
+ except Exception as e:
97
+ # Critical: Catch ALL exceptions to prevent observer thread crash
98
+ logger.error(
99
+ f"Unhandled exception in event handler for {event.src_path}: {e}",
100
+ exc_info=True,
101
+ )
102
+
67
103
  def _handle_event_sync(self, event: FileSystemEvent) -> None:
68
104
  """Handle event synchronously."""
69
105
  path = Path(event.src_path)
70
-
106
+
71
107
  # Process the change
72
108
  change = self.change_detector.process_fs_event(
73
109
  event_type=event.event_type,
74
110
  path=path,
75
- old_path=Path(event.dest_path) if hasattr(event, 'dest_path') else None
111
+ old_path=Path(event.dest_path) if hasattr(event, "dest_path") else None,
76
112
  )
77
-
78
113
  if change:
79
114
  # Update Merkle tree if available
80
115
  if self.merkle_tree:
81
116
  try:
82
117
  self.merkle_tree.update_file(change.path)
83
118
  except Exception as e:
84
- logger.warning(f"Failed to update Merkle tree for {change.path}: {e}")
85
-
119
+ logger.warning(
120
+ f"Failed to update Merkle tree for {change.path}: {e}"
121
+ )
86
122
  # Call callback if provided
87
123
  if self.callback:
88
124
  try:
89
125
  self.callback(change)
90
126
  except Exception as e:
91
127
  logger.error(f"Callback failed for change {change.path}: {e}")
92
-
128
+
93
129
  async def _handle_event_async(self, event: FileSystemEvent) -> None:
94
130
  """Handle event asynchronously with debouncing."""
95
131
  file_path = event.src_path
96
-
132
+
97
133
  # Cancel existing debounce task for this file
98
134
  if file_path in self.debounce_tasks:
99
135
  self.debounce_tasks[file_path].cancel()
100
-
136
+
101
137
  # Store pending event
102
138
  self.pending_events[file_path] = event
103
-
139
+
104
140
  # Create new debounce task
105
141
  self.debounce_tasks[file_path] = asyncio.create_task(
106
142
  self._process_after_debounce(file_path)
107
143
  )
108
-
144
+
109
145
  async def _process_after_debounce(self, file_path: str) -> None:
110
146
  """Process event after debounce delay."""
111
147
  # Wait for debounce interval
112
148
  await asyncio.sleep(0.1) # 100ms debounce
113
-
149
+
114
150
  # Get pending event
115
151
  event = self.pending_events.pop(file_path, None)
116
152
  if event:
117
153
  self._handle_event_sync(event)
118
-
154
+
119
155
  # Clean up task reference
120
156
  self.debounce_tasks.pop(file_path, None)
121
157
 
122
- class FileWatcher:
158
+
159
+ class FileWatcher(BaseFileWatcher):
123
160
  """
124
161
  Real-time file system watcher for vector mode.
125
-
162
+
126
163
  Monitors file changes and integrates with change detection and Merkle tree
127
164
  systems for efficient vector index updates.
128
165
  """
129
-
166
+
130
167
  def __init__(
131
168
  self,
132
169
  project_root: Path,
@@ -137,7 +174,7 @@ class FileWatcher:
137
174
  ):
138
175
  """
139
176
  Initialize file watcher.
140
-
177
+
141
178
  Args:
142
179
  project_root: Root directory to watch
143
180
  project_id: Project identifier
@@ -147,43 +184,45 @@ class FileWatcher:
147
184
  """
148
185
  if not WATCHDOG_AVAILABLE:
149
186
  raise ImportError("watchdog library is required for file monitoring")
150
-
187
+
151
188
  self.project_root = Path(project_root).resolve()
152
189
  self.project_id = project_id
153
190
  self.ignore_patterns = ignore_patterns
154
191
  self.debounce_interval = debounce_interval
155
-
192
+
156
193
  # Initialize components
157
194
  self.change_detector = ChangeDetector(
158
195
  project_root=self.project_root,
159
196
  ignore_patterns=ignore_patterns,
160
197
  debounce_interval=debounce_interval,
161
198
  )
162
-
199
+
163
200
  self.merkle_tree: Optional[MerkleTree] = None
164
201
  if enable_merkle_tree:
165
202
  self.merkle_tree = MerkleTree(self.project_root, project_id)
166
-
203
+
167
204
  # Watchdog components
168
205
  self.observer: Optional[Observer] = None
169
206
  self.event_handler: Optional[VectorModeEventHandler] = None
170
-
207
+
171
208
  # State
172
209
  self.is_watching = False
173
210
  self.change_callbacks: List[Callable[[FileChange], None]] = []
174
-
211
+
175
212
  # Thread pool for intensive operations
176
- self.executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="file_watcher")
177
-
213
+ self.executor = ThreadPoolExecutor(
214
+ max_workers=2, thread_name_prefix="file_watcher"
215
+ )
216
+
178
217
  def add_change_callback(self, callback: Callable[[FileChange], None]) -> None:
179
218
  """Add a callback to be called when files change."""
180
219
  self.change_callbacks.append(callback)
181
-
220
+
182
221
  def remove_change_callback(self, callback: Callable[[FileChange], None]) -> None:
183
222
  """Remove a change callback."""
184
223
  if callback in self.change_callbacks:
185
224
  self.change_callbacks.remove(callback)
186
-
225
+
187
226
  def _on_change(self, change: FileChange) -> None:
188
227
  """Handle a file change by notifying all callbacks."""
189
228
  for callback in self.change_callbacks:
@@ -191,125 +230,136 @@ class FileWatcher:
191
230
  callback(change)
192
231
  except Exception as e:
193
232
  logger.error(f"Change callback failed: {e}")
194
-
233
+
195
234
  async def initialize(self) -> None:
196
235
  """Initialize the file watcher (build Merkle tree, etc.)."""
197
236
  logger.info(f"Initializing file watcher for {self.project_root}")
198
-
237
+
199
238
  # Build Merkle tree in thread pool to avoid blocking
200
239
  if self.merkle_tree:
201
240
  loop = asyncio.get_event_loop()
202
241
  await loop.run_in_executor(
203
- self.executor,
204
- self.merkle_tree.build_tree,
205
- self.ignore_patterns
242
+ self.executor, self.merkle_tree.build_tree, self.ignore_patterns
206
243
  )
207
-
208
244
  logger.info("Merkle tree built successfully")
209
-
245
+
210
246
  def start_watching(self) -> None:
211
247
  """Start watching for file changes."""
212
248
  if self.is_watching:
213
249
  logger.warning("File watcher is already running")
214
250
  return
215
-
251
+
216
252
  if not WATCHDOG_AVAILABLE:
217
253
  logger.error("Cannot start file watching: watchdog not available")
218
254
  return
219
-
255
+
220
256
  logger.info(f"Starting file watcher for {self.project_root}")
221
-
257
+
222
258
  # Create event handler
223
259
  self.event_handler = VectorModeEventHandler(
224
260
  change_detector=self.change_detector,
225
261
  merkle_tree=self.merkle_tree,
226
262
  callback=self._on_change,
227
263
  )
228
-
264
+
229
265
  # Create and start observer
230
- self.observer = Observer()
231
- self.observer.schedule(
232
- self.event_handler,
233
- str(self.project_root),
234
- recursive=True
235
- )
236
- self.observer.start()
237
-
266
+ try:
267
+ self.observer = Observer()
268
+ self.observer.schedule(
269
+ self.event_handler, str(self.project_root), recursive=True
270
+ )
271
+ self.observer.start()
272
+ except Exception as e:
273
+ logger.error(f"Failed to start file observer: {e}", exc_info=True)
274
+ # Clean up on failure
275
+ if self.observer:
276
+ try:
277
+ self.observer.stop()
278
+ self.observer = None
279
+ except Exception:
280
+ pass
281
+ self.event_handler = None
282
+ self.is_watching = False
283
+ raise
284
+
238
285
  self.is_watching = True
239
286
  logger.info("File watcher started successfully")
240
-
287
+
241
288
  def stop_watching(self) -> None:
242
289
  """Stop watching for file changes."""
243
290
  if not self.is_watching:
244
291
  return
245
-
292
+
246
293
  logger.info("Stopping file watcher")
247
-
294
+
248
295
  if self.observer:
249
- self.observer.stop()
250
- self.observer.join()
251
- self.observer = None
252
-
296
+ try:
297
+ self.observer.stop()
298
+ self.observer.join()
299
+ except Exception as e:
300
+ logger.error(f"Error stopping file observer: {e}", exc_info=True)
301
+ finally:
302
+ self.observer = None
303
+
253
304
  self.event_handler = None
254
305
  self.is_watching = False
255
-
306
+
256
307
  logger.info("File watcher stopped")
257
-
308
+
258
309
  def get_recent_changes(
259
310
  self,
260
311
  limit: Optional[int] = None,
261
- change_types: Optional[List[ChangeType]] = None
312
+ change_types: Optional[List[ChangeType]] = None,
262
313
  ) -> List[FileChange]:
263
314
  """Get recent file changes."""
264
315
  return self.change_detector.get_recent_changes(limit, change_types)
265
-
316
+
266
317
  def get_changed_files(self, since: Optional[str] = None) -> List[str]:
267
318
  """Get list of files that have changed."""
268
319
  from datetime import datetime
269
-
320
+
270
321
  since_dt = None
271
322
  if since:
272
323
  try:
273
324
  since_dt = datetime.fromisoformat(since)
274
325
  except ValueError:
275
326
  logger.warning(f"Invalid timestamp format: {since}")
276
-
327
+
277
328
  # Get changes from detector
278
329
  changed_files = list(self.change_detector.get_changed_files(since_dt))
279
-
330
+
280
331
  # Add changes from Merkle tree if available
281
332
  if self.merkle_tree:
282
333
  merkle_changes = self.merkle_tree.get_changed_files(since_dt)
283
334
  changed_files.extend(merkle_changes)
284
-
335
+
285
336
  return list(set(changed_files)) # Remove duplicates
286
-
337
+
287
338
  def force_scan(self) -> int:
288
339
  """Force a full scan and return number of changes detected."""
289
340
  logger.info("Forcing full file system scan")
290
-
341
+
291
342
  if self.merkle_tree:
292
343
  # Rebuild Merkle tree
293
344
  self.merkle_tree.build_tree(self.ignore_patterns)
294
-
345
+
295
346
  # Get changed files
296
347
  changed_files = self.merkle_tree.get_changed_files()
297
-
348
+
298
349
  # Process changes through detector
299
350
  for file_path in changed_files:
300
351
  full_path = self.project_root / file_path
301
352
  change = self.change_detector.process_fs_event(
302
- event_type="modified",
303
- path=full_path
353
+ event_type="modified", path=full_path
304
354
  )
305
-
355
+
306
356
  if change and self.change_callbacks:
307
357
  self._on_change(change)
308
-
358
+
309
359
  return len(changed_files)
310
-
360
+
311
361
  return 0
312
-
362
+
313
363
  def get_stats(self) -> Dict[str, Any]:
314
364
  """Get watcher statistics."""
315
365
  stats = {
@@ -319,127 +369,131 @@ class FileWatcher:
319
369
  "change_detector_stats": self.change_detector.get_stats().__dict__,
320
370
  "callbacks_registered": len(self.change_callbacks),
321
371
  }
322
-
372
+
323
373
  if self.merkle_tree:
324
374
  stats["merkle_tree"] = self.merkle_tree.get_tree_summary()
325
-
375
+
326
376
  return stats
327
-
377
+
328
378
  def cleanup(self) -> None:
329
379
  """Clean up resources."""
330
380
  self.stop_watching()
331
-
381
+
332
382
  if self.executor:
333
383
  self.executor.shutdown(wait=True)
334
-
384
+
335
385
  async def __aenter__(self):
336
386
  await self.initialize()
337
387
  return self
338
-
388
+
339
389
  async def __aexit__(self, exc_type, exc_val, exc_tb):
340
390
  self.cleanup()
341
391
 
392
+
342
393
  # Fallback implementation for when watchdog is not available
343
- class PollingFileWatcher:
394
+ class PollingFileWatcher(BaseFileWatcher):
344
395
  """
345
396
  Fallback file watcher using polling instead of OS events.
346
-
397
+
347
398
  Used when watchdog is not available or on systems that don't support
348
399
  efficient file system monitoring.
349
400
  """
350
-
401
+
351
402
  def __init__(
352
- self,
353
- project_root: Path,
354
- project_id: str,
355
- poll_interval: float = 5.0,
356
- **kwargs
403
+ self, project_root: Path, project_id: str, poll_interval: float = 5.0, **kwargs
357
404
  ):
358
405
  """Initialize polling file watcher."""
359
406
  self.project_root = Path(project_root).resolve()
360
407
  self.project_id = project_id
361
408
  self.poll_interval = poll_interval
362
-
409
+
363
410
  self.change_detector = ChangeDetector(project_root=self.project_root, **kwargs)
364
411
  self.merkle_tree = MerkleTree(self.project_root, project_id)
365
-
412
+
366
413
  self.is_watching = False
367
414
  self.poll_task: Optional[asyncio.Task] = None
368
415
  self.change_callbacks: List[Callable[[FileChange], None]] = []
369
-
416
+
370
417
  def add_change_callback(self, callback: Callable[[FileChange], None]) -> None:
371
418
  """Add a callback to be called when files change."""
372
419
  self.change_callbacks.append(callback)
373
-
420
+
374
421
  async def initialize(self) -> None:
375
422
  """Initialize the polling watcher."""
376
423
  self.merkle_tree.build_tree()
377
-
424
+
378
425
  def start_watching(self) -> None:
379
426
  """Start polling for changes."""
380
427
  if self.is_watching:
381
428
  return
382
-
429
+
383
430
  self.is_watching = True
384
431
  self.poll_task = asyncio.create_task(self._poll_loop())
385
-
432
+
386
433
  def stop_watching(self) -> None:
387
434
  """Stop polling for changes."""
388
435
  self.is_watching = False
389
436
  if self.poll_task:
390
437
  self.poll_task.cancel()
391
-
438
+
392
439
  async def _poll_loop(self) -> None:
393
440
  """Main polling loop."""
394
441
  while self.is_watching:
395
442
  try:
396
443
  # Force scan for changes
397
444
  changed_files = self.merkle_tree.get_changed_files()
398
-
445
+
399
446
  for file_path in changed_files:
400
447
  full_path = self.project_root / file_path
401
448
  change = self.change_detector.process_fs_event(
402
- event_type="modified",
403
- path=full_path
449
+ event_type="modified", path=full_path
404
450
  )
405
-
451
+
406
452
  if change:
407
453
  for callback in self.change_callbacks:
408
454
  callback(change)
409
-
455
+
410
456
  await asyncio.sleep(self.poll_interval)
411
-
457
+
412
458
  except asyncio.CancelledError:
413
459
  break
414
460
  except Exception as e:
415
461
  logger.error(f"Error in polling loop: {e}")
416
462
  await asyncio.sleep(self.poll_interval)
417
-
463
+
464
+ def get_stats(self) -> Dict[str, Any]:
465
+ """Get watcher statistics."""
466
+ return {
467
+ "project_root": str(self.project_root),
468
+ "project_id": self.project_id,
469
+ "is_watching": self.is_watching,
470
+ }
471
+
418
472
  def cleanup(self) -> None:
419
473
  """Clean up resources."""
420
474
  self.stop_watching()
421
475
 
476
+
422
477
  def create_file_watcher(
423
- project_root: Path,
424
- project_id: str,
425
- use_polling: bool = False,
426
- **kwargs
427
- ) -> Any:
478
+ project_root: Path, project_id: str, use_polling: bool = False, **kwargs
479
+ ) -> BaseFileWatcher:
428
480
  """
429
481
  Create appropriate file watcher based on availability.
430
-
482
+
431
483
  Args:
432
484
  project_root: Root directory to watch
433
485
  project_id: Project identifier
434
486
  use_polling: Force use of polling watcher
435
487
  **kwargs: Additional arguments for watcher
436
-
488
+
437
489
  Returns:
438
490
  FileWatcher or PollingFileWatcher instance
439
491
  """
440
492
  if use_polling or not WATCHDOG_AVAILABLE:
441
493
  logger.info("Using polling file watcher")
442
- return PollingFileWatcher(project_root, project_id, **kwargs)
494
+ watcher = PollingFileWatcher(project_root, project_id, **kwargs)
443
495
  else:
444
496
  logger.info("Using real-time file watcher")
445
- return FileWatcher(project_root, project_id, **kwargs)
497
+ watcher = FileWatcher(project_root, project_id, **kwargs)
498
+
499
+ return watcher