mcp-code-indexer 4.2.14__py3-none-any.whl → 4.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. mcp_code_indexer/database/database.py +251 -85
  2. mcp_code_indexer/database/models.py +66 -24
  3. mcp_code_indexer/database/retry_executor.py +15 -5
  4. mcp_code_indexer/file_scanner.py +107 -12
  5. mcp_code_indexer/main.py +75 -23
  6. mcp_code_indexer/server/mcp_server.py +191 -1
  7. mcp_code_indexer/vector_mode/chunking/ast_chunker.py +103 -84
  8. mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +1 -0
  9. mcp_code_indexer/vector_mode/config.py +113 -45
  10. mcp_code_indexer/vector_mode/const.py +24 -0
  11. mcp_code_indexer/vector_mode/daemon.py +860 -98
  12. mcp_code_indexer/vector_mode/monitoring/change_detector.py +113 -97
  13. mcp_code_indexer/vector_mode/monitoring/file_watcher.py +175 -121
  14. mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +291 -98
  15. mcp_code_indexer/vector_mode/providers/voyage_client.py +140 -38
  16. mcp_code_indexer/vector_mode/services/__init__.py +9 -0
  17. mcp_code_indexer/vector_mode/services/embedding_service.py +389 -0
  18. mcp_code_indexer/vector_mode/services/vector_mode_tools_service.py +459 -0
  19. mcp_code_indexer/vector_mode/services/vector_storage_service.py +580 -0
  20. mcp_code_indexer/vector_mode/types.py +46 -0
  21. mcp_code_indexer/vector_mode/utils.py +50 -0
  22. {mcp_code_indexer-4.2.14.dist-info → mcp_code_indexer-4.2.16.dist-info}/METADATA +13 -10
  23. {mcp_code_indexer-4.2.14.dist-info → mcp_code_indexer-4.2.16.dist-info}/RECORD +26 -19
  24. {mcp_code_indexer-4.2.14.dist-info → mcp_code_indexer-4.2.16.dist-info}/WHEEL +1 -1
  25. {mcp_code_indexer-4.2.14.dist-info → mcp_code_indexer-4.2.16.dist-info}/entry_points.txt +0 -0
  26. {mcp_code_indexer-4.2.14.dist-info → mcp_code_indexer-4.2.16.dist-info/licenses}/LICENSE +0 -0
@@ -11,18 +11,23 @@ from typing import List, Dict, Set, Optional, NamedTuple
11
11
  from pathlib import Path
12
12
  from dataclasses import dataclass
13
13
  from datetime import datetime
14
+ from ..utils import should_ignore_path
14
15
 
15
16
  logger = logging.getLogger(__name__)
16
17
 
18
+
17
19
  class ChangeType(str, Enum):
18
20
  """Types of file system changes."""
21
+
19
22
  CREATED = "created"
20
23
  MODIFIED = "modified"
21
24
  DELETED = "deleted"
22
25
  MOVED = "moved"
23
26
 
27
+
24
28
  class FileChange(NamedTuple):
25
29
  """Represents a file system change."""
30
+
26
31
  path: str
27
32
  change_type: ChangeType
28
33
  timestamp: datetime
@@ -30,9 +35,11 @@ class FileChange(NamedTuple):
30
35
  size: Optional[int] = None
31
36
  hash: Optional[str] = None
32
37
 
38
+
33
39
  @dataclass
34
40
  class ChangeStats:
35
41
  """Statistics about detected changes."""
42
+
36
43
  total_changes: int = 0
37
44
  creates: int = 0
38
45
  modifications: int = 0
@@ -41,14 +48,15 @@ class ChangeStats:
41
48
  start_time: Optional[datetime] = None
42
49
  last_change: Optional[datetime] = None
43
50
 
51
+
44
52
  class ChangeDetector:
45
53
  """
46
54
  High-level change detection and classification.
47
-
55
+
48
56
  Processes raw file system events and provides structured change information
49
57
  for the vector indexing pipeline.
50
58
  """
51
-
59
+
52
60
  def __init__(
53
61
  self,
54
62
  project_root: Path,
@@ -57,7 +65,7 @@ class ChangeDetector:
57
65
  ):
58
66
  """
59
67
  Initialize change detector.
60
-
68
+
61
69
  Args:
62
70
  project_root: Root directory to monitor
63
71
  ignore_patterns: Patterns to ignore (glob-style)
@@ -65,54 +73,38 @@ class ChangeDetector:
65
73
  """
66
74
  self.project_root = Path(project_root).resolve()
67
75
  self.ignore_patterns = ignore_patterns or [
68
- "*.log", "*.tmp", "*~", ".git/*", "__pycache__/*",
69
- "node_modules/*", "*.pyc", "*.pyo", ".DS_Store", "Thumbs.db"
76
+ "*.log",
77
+ "*.tmp",
78
+ "*~",
79
+ ".git/*",
80
+ "__pycache__/*",
81
+ "node_modules/*",
82
+ "*.pyc",
83
+ "*.pyo",
84
+ ".DS_Store",
85
+ "Thumbs.db",
70
86
  ]
71
87
  self.debounce_interval = debounce_interval
72
-
88
+
73
89
  # Change tracking
74
90
  self.recent_changes: List[FileChange] = []
75
91
  self.pending_changes: Dict[str, FileChange] = {}
76
92
  self.last_change_time: Dict[str, datetime] = {}
77
-
93
+
78
94
  # Statistics
79
95
  self.stats = ChangeStats(start_time=datetime.utcnow())
80
-
81
- # Compile ignore patterns for performance
82
- import fnmatch
83
- self._compiled_patterns = [
84
- fnmatch.translate(pattern) for pattern in self.ignore_patterns
85
- ]
86
-
87
- def should_ignore_path(self, path: Path) -> bool:
88
- """Check if a path should be ignored based on patterns."""
89
- try:
90
- relative_path = path.relative_to(self.project_root)
91
- path_str = str(relative_path)
92
-
93
- import re
94
- for pattern in self._compiled_patterns:
95
- if re.match(pattern, path_str):
96
- return True
97
-
98
- return False
99
-
100
- except ValueError:
101
- # Path is not relative to project root
102
- return True
103
-
96
+
104
97
  def _should_debounce(self, file_path: str) -> bool:
105
98
  """Check if change should be debounced."""
106
99
  now = datetime.utcnow()
107
-
108
100
  if file_path in self.last_change_time:
109
101
  elapsed = (now - self.last_change_time[file_path]).total_seconds()
110
102
  if elapsed < self.debounce_interval:
111
103
  return True
112
-
104
+
113
105
  self.last_change_time[file_path] = now
114
106
  return False
115
-
107
+
116
108
  def _get_file_info(self, path: Path) -> Dict[str, Optional[int]]:
117
109
  """Get file information (size, etc.)."""
118
110
  try:
@@ -123,35 +115,23 @@ class ChangeDetector:
123
115
  return {"size": None}
124
116
  except (OSError, PermissionError):
125
117
  return {"size": None}
126
-
118
+
127
119
  def _classify_change(
128
- self,
129
- path: Path,
130
- event_type: str,
131
- old_path: Optional[Path] = None
120
+ self, path: Path, event_type: str, old_path: Optional[Path] = None
132
121
  ) -> Optional[FileChange]:
133
122
  """Classify a file system event into a structured change."""
134
-
135
123
  # Convert to relative path
136
124
  try:
137
125
  relative_path = str(path.relative_to(self.project_root))
138
126
  except ValueError:
139
127
  # Path outside project root
140
128
  return None
141
-
129
+
142
130
  # Check if should be ignored
143
- if self.should_ignore_path(path):
131
+ if should_ignore_path(path, self.project_root, self.ignore_patterns):
144
132
  logger.debug(f"Ignoring change to {relative_path} (matches ignore pattern)")
145
133
  return None
146
-
147
- # Check debouncing
148
- if self._should_debounce(relative_path):
149
- logger.debug(f"Debouncing change to {relative_path}")
150
- return None
151
-
152
- # Get file info
153
- file_info = self._get_file_info(path)
154
-
134
+
155
135
  # Map event types to change types
156
136
  if event_type in ["created", "added"]:
157
137
  change_type = ChangeType.CREATED
@@ -164,7 +144,15 @@ class ChangeDetector:
164
144
  else:
165
145
  logger.warning(f"Unknown event type: {event_type}")
166
146
  return None
167
-
147
+
148
+ # Check debouncing
149
+ if self._should_debounce(relative_path):
150
+ logger.debug(f"Debouncing change to {relative_path}")
151
+ return None
152
+
153
+ # Get file info
154
+ file_info = self._get_file_info(path)
155
+
168
156
  # Create change object
169
157
  old_relative_path = None
170
158
  if old_path:
@@ -172,44 +160,40 @@ class ChangeDetector:
172
160
  old_relative_path = str(old_path.relative_to(self.project_root))
173
161
  except ValueError:
174
162
  pass
175
-
163
+
176
164
  change = FileChange(
177
165
  path=relative_path,
178
166
  change_type=change_type,
179
167
  timestamp=datetime.utcnow(),
180
168
  old_path=old_relative_path,
181
169
  size=file_info.get("size"),
182
- hash=None # Will be computed later if needed
170
+ hash=None, # Will be computed later if needed
183
171
  )
184
-
185
172
  return change
186
-
173
+
187
174
  def process_fs_event(
188
- self,
189
- event_type: str,
190
- path: Path,
191
- old_path: Optional[Path] = None
175
+ self, event_type: str, path: Path, old_path: Optional[Path] = None
192
176
  ) -> Optional[FileChange]:
193
177
  """
194
178
  Process a file system event and return structured change.
195
-
179
+
196
180
  Args:
197
181
  event_type: Type of event (created, modified, deleted, moved)
198
182
  path: Path that changed
199
183
  old_path: Old path (for moves)
200
-
184
+
201
185
  Returns:
202
186
  FileChange object or None if ignored
203
187
  """
204
188
  change = self._classify_change(path, event_type, old_path)
205
-
189
+
206
190
  if change:
207
191
  self.recent_changes.append(change)
208
-
192
+
209
193
  # Update statistics
210
194
  self.stats.total_changes += 1
211
195
  self.stats.last_change = change.timestamp
212
-
196
+
213
197
  if change.change_type == ChangeType.CREATED:
214
198
  self.stats.creates += 1
215
199
  elif change.change_type == ChangeType.MODIFIED:
@@ -218,95 +202,127 @@ class ChangeDetector:
218
202
  self.stats.deletions += 1
219
203
  elif change.change_type == ChangeType.MOVED:
220
204
  self.stats.moves += 1
221
-
205
+
222
206
  logger.info(f"Detected change: {change.change_type.value} {change.path}")
223
-
207
+
224
208
  return change
225
-
209
+
226
210
  def get_recent_changes(
227
211
  self,
228
212
  limit: Optional[int] = None,
229
- change_types: Optional[List[ChangeType]] = None
213
+ change_types: Optional[List[ChangeType]] = None,
230
214
  ) -> List[FileChange]:
231
215
  """
232
216
  Get recent changes with optional filtering.
233
-
217
+
234
218
  Args:
235
219
  limit: Maximum number of changes to return
236
220
  change_types: Filter by change types
237
-
221
+
238
222
  Returns:
239
223
  List of recent changes
240
224
  """
241
225
  changes = self.recent_changes
242
-
226
+
243
227
  # Filter by change types
244
228
  if change_types:
245
229
  changes = [c for c in changes if c.change_type in change_types]
246
-
230
+
247
231
  # Sort by timestamp (most recent first)
248
232
  changes = sorted(changes, key=lambda c: c.timestamp, reverse=True)
249
-
233
+
250
234
  # Apply limit
251
235
  if limit:
252
236
  changes = changes[:limit]
253
-
237
+
254
238
  return changes
255
-
239
+
256
240
  def clear_recent_changes(self) -> int:
257
241
  """Clear recent changes and return count cleared."""
258
242
  count = len(self.recent_changes)
259
243
  self.recent_changes.clear()
260
244
  return count
261
-
245
+
262
246
  def get_changes_since(self, since: datetime) -> List[FileChange]:
263
247
  """Get all changes since a specific timestamp."""
264
- return [
265
- change for change in self.recent_changes
266
- if change.timestamp >= since
267
- ]
268
-
248
+ return [change for change in self.recent_changes if change.timestamp >= since]
249
+
269
250
  def get_stats(self) -> ChangeStats:
270
251
  """Get change detection statistics."""
271
252
  return self.stats
272
-
253
+
273
254
  def reset_stats(self) -> None:
274
255
  """Reset change detection statistics."""
275
256
  self.stats = ChangeStats(start_time=datetime.utcnow())
276
-
257
+
277
258
  def get_changed_files(self, since: Optional[datetime] = None) -> Set[str]:
278
259
  """Get set of file paths that have changed."""
279
260
  changes = self.recent_changes
280
-
261
+
281
262
  if since:
282
263
  changes = [c for c in changes if c.timestamp >= since]
283
-
264
+
284
265
  # Collect unique file paths
285
266
  changed_files = set()
286
267
  for change in changes:
287
268
  changed_files.add(change.path)
288
269
  if change.old_path: # For moves
289
270
  changed_files.add(change.old_path)
290
-
271
+
291
272
  return changed_files
292
-
273
+
293
274
  def is_code_file(self, path: str) -> bool:
294
275
  """Check if a file is likely a code file."""
295
276
  code_extensions = {
296
- '.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h',
297
- '.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
298
- '.clj', '.cljs', '.hs', '.ml', '.fs', '.ex', '.exs', '.cr',
299
- '.dart', '.lua', '.pl', '.sh', '.bash', '.zsh', '.fish',
300
- '.sql', '.r', '.m', '.mm', '.vim', '.el', '.lisp', '.scm'
277
+ ".py",
278
+ ".js",
279
+ ".ts",
280
+ ".jsx",
281
+ ".tsx",
282
+ ".java",
283
+ ".cpp",
284
+ ".c",
285
+ ".h",
286
+ ".cs",
287
+ ".php",
288
+ ".rb",
289
+ ".go",
290
+ ".rs",
291
+ ".swift",
292
+ ".kt",
293
+ ".scala",
294
+ ".clj",
295
+ ".cljs",
296
+ ".hs",
297
+ ".ml",
298
+ ".fs",
299
+ ".ex",
300
+ ".exs",
301
+ ".cr",
302
+ ".dart",
303
+ ".lua",
304
+ ".pl",
305
+ ".sh",
306
+ ".bash",
307
+ ".zsh",
308
+ ".fish",
309
+ ".sql",
310
+ ".r",
311
+ ".m",
312
+ ".mm",
313
+ ".vim",
314
+ ".el",
315
+ ".lisp",
316
+ ".scm",
301
317
  }
302
-
318
+
303
319
  return Path(path).suffix.lower() in code_extensions
304
-
320
+
305
321
  def get_code_changes(self, since: Optional[datetime] = None) -> List[FileChange]:
306
322
  """Get changes to code files only."""
307
323
  changes = self.get_recent_changes()
308
-
324
+
309
325
  if since:
310
326
  changes = [c for c in changes if c.timestamp >= since]
311
-
327
+
312
328
  return [c for c in changes if self.is_code_file(c.path)]