mcp-code-indexer 4.2.15__py3-none-any.whl → 4.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/database/database.py +251 -85
- mcp_code_indexer/database/models.py +66 -24
- mcp_code_indexer/database/retry_executor.py +15 -5
- mcp_code_indexer/file_scanner.py +107 -12
- mcp_code_indexer/main.py +43 -30
- mcp_code_indexer/server/mcp_server.py +191 -1
- mcp_code_indexer/vector_mode/chunking/ast_chunker.py +103 -84
- mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +1 -0
- mcp_code_indexer/vector_mode/config.py +113 -45
- mcp_code_indexer/vector_mode/const.py +24 -0
- mcp_code_indexer/vector_mode/daemon.py +860 -98
- mcp_code_indexer/vector_mode/monitoring/change_detector.py +113 -97
- mcp_code_indexer/vector_mode/monitoring/file_watcher.py +175 -121
- mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +291 -98
- mcp_code_indexer/vector_mode/providers/voyage_client.py +140 -38
- mcp_code_indexer/vector_mode/services/__init__.py +9 -0
- mcp_code_indexer/vector_mode/services/embedding_service.py +389 -0
- mcp_code_indexer/vector_mode/services/vector_mode_tools_service.py +459 -0
- mcp_code_indexer/vector_mode/services/vector_storage_service.py +580 -0
- mcp_code_indexer/vector_mode/types.py +46 -0
- mcp_code_indexer/vector_mode/utils.py +50 -0
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.16.dist-info}/METADATA +13 -10
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.16.dist-info}/RECORD +26 -19
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.16.dist-info}/WHEEL +1 -1
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.16.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.16.dist-info/licenses}/LICENSE +0 -0
|
@@ -11,18 +11,23 @@ from typing import List, Dict, Set, Optional, NamedTuple
|
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
from dataclasses import dataclass
|
|
13
13
|
from datetime import datetime
|
|
14
|
+
from ..utils import should_ignore_path
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
16
17
|
|
|
18
|
+
|
|
17
19
|
class ChangeType(str, Enum):
|
|
18
20
|
"""Types of file system changes."""
|
|
21
|
+
|
|
19
22
|
CREATED = "created"
|
|
20
23
|
MODIFIED = "modified"
|
|
21
24
|
DELETED = "deleted"
|
|
22
25
|
MOVED = "moved"
|
|
23
26
|
|
|
27
|
+
|
|
24
28
|
class FileChange(NamedTuple):
|
|
25
29
|
"""Represents a file system change."""
|
|
30
|
+
|
|
26
31
|
path: str
|
|
27
32
|
change_type: ChangeType
|
|
28
33
|
timestamp: datetime
|
|
@@ -30,9 +35,11 @@ class FileChange(NamedTuple):
|
|
|
30
35
|
size: Optional[int] = None
|
|
31
36
|
hash: Optional[str] = None
|
|
32
37
|
|
|
38
|
+
|
|
33
39
|
@dataclass
|
|
34
40
|
class ChangeStats:
|
|
35
41
|
"""Statistics about detected changes."""
|
|
42
|
+
|
|
36
43
|
total_changes: int = 0
|
|
37
44
|
creates: int = 0
|
|
38
45
|
modifications: int = 0
|
|
@@ -41,14 +48,15 @@ class ChangeStats:
|
|
|
41
48
|
start_time: Optional[datetime] = None
|
|
42
49
|
last_change: Optional[datetime] = None
|
|
43
50
|
|
|
51
|
+
|
|
44
52
|
class ChangeDetector:
|
|
45
53
|
"""
|
|
46
54
|
High-level change detection and classification.
|
|
47
|
-
|
|
55
|
+
|
|
48
56
|
Processes raw file system events and provides structured change information
|
|
49
57
|
for the vector indexing pipeline.
|
|
50
58
|
"""
|
|
51
|
-
|
|
59
|
+
|
|
52
60
|
def __init__(
|
|
53
61
|
self,
|
|
54
62
|
project_root: Path,
|
|
@@ -57,7 +65,7 @@ class ChangeDetector:
|
|
|
57
65
|
):
|
|
58
66
|
"""
|
|
59
67
|
Initialize change detector.
|
|
60
|
-
|
|
68
|
+
|
|
61
69
|
Args:
|
|
62
70
|
project_root: Root directory to monitor
|
|
63
71
|
ignore_patterns: Patterns to ignore (glob-style)
|
|
@@ -65,54 +73,38 @@ class ChangeDetector:
|
|
|
65
73
|
"""
|
|
66
74
|
self.project_root = Path(project_root).resolve()
|
|
67
75
|
self.ignore_patterns = ignore_patterns or [
|
|
68
|
-
"*.log",
|
|
69
|
-
"
|
|
76
|
+
"*.log",
|
|
77
|
+
"*.tmp",
|
|
78
|
+
"*~",
|
|
79
|
+
".git/*",
|
|
80
|
+
"__pycache__/*",
|
|
81
|
+
"node_modules/*",
|
|
82
|
+
"*.pyc",
|
|
83
|
+
"*.pyo",
|
|
84
|
+
".DS_Store",
|
|
85
|
+
"Thumbs.db",
|
|
70
86
|
]
|
|
71
87
|
self.debounce_interval = debounce_interval
|
|
72
|
-
|
|
88
|
+
|
|
73
89
|
# Change tracking
|
|
74
90
|
self.recent_changes: List[FileChange] = []
|
|
75
91
|
self.pending_changes: Dict[str, FileChange] = {}
|
|
76
92
|
self.last_change_time: Dict[str, datetime] = {}
|
|
77
|
-
|
|
93
|
+
|
|
78
94
|
# Statistics
|
|
79
95
|
self.stats = ChangeStats(start_time=datetime.utcnow())
|
|
80
|
-
|
|
81
|
-
# Compile ignore patterns for performance
|
|
82
|
-
import fnmatch
|
|
83
|
-
self._compiled_patterns = [
|
|
84
|
-
fnmatch.translate(pattern) for pattern in self.ignore_patterns
|
|
85
|
-
]
|
|
86
|
-
|
|
87
|
-
def should_ignore_path(self, path: Path) -> bool:
|
|
88
|
-
"""Check if a path should be ignored based on patterns."""
|
|
89
|
-
try:
|
|
90
|
-
relative_path = path.relative_to(self.project_root)
|
|
91
|
-
path_str = str(relative_path)
|
|
92
|
-
|
|
93
|
-
import re
|
|
94
|
-
for pattern in self._compiled_patterns:
|
|
95
|
-
if re.match(pattern, path_str):
|
|
96
|
-
return True
|
|
97
|
-
|
|
98
|
-
return False
|
|
99
|
-
|
|
100
|
-
except ValueError:
|
|
101
|
-
# Path is not relative to project root
|
|
102
|
-
return True
|
|
103
|
-
|
|
96
|
+
|
|
104
97
|
def _should_debounce(self, file_path: str) -> bool:
|
|
105
98
|
"""Check if change should be debounced."""
|
|
106
99
|
now = datetime.utcnow()
|
|
107
|
-
|
|
108
100
|
if file_path in self.last_change_time:
|
|
109
101
|
elapsed = (now - self.last_change_time[file_path]).total_seconds()
|
|
110
102
|
if elapsed < self.debounce_interval:
|
|
111
103
|
return True
|
|
112
|
-
|
|
104
|
+
|
|
113
105
|
self.last_change_time[file_path] = now
|
|
114
106
|
return False
|
|
115
|
-
|
|
107
|
+
|
|
116
108
|
def _get_file_info(self, path: Path) -> Dict[str, Optional[int]]:
|
|
117
109
|
"""Get file information (size, etc.)."""
|
|
118
110
|
try:
|
|
@@ -123,35 +115,23 @@ class ChangeDetector:
|
|
|
123
115
|
return {"size": None}
|
|
124
116
|
except (OSError, PermissionError):
|
|
125
117
|
return {"size": None}
|
|
126
|
-
|
|
118
|
+
|
|
127
119
|
def _classify_change(
|
|
128
|
-
self,
|
|
129
|
-
path: Path,
|
|
130
|
-
event_type: str,
|
|
131
|
-
old_path: Optional[Path] = None
|
|
120
|
+
self, path: Path, event_type: str, old_path: Optional[Path] = None
|
|
132
121
|
) -> Optional[FileChange]:
|
|
133
122
|
"""Classify a file system event into a structured change."""
|
|
134
|
-
|
|
135
123
|
# Convert to relative path
|
|
136
124
|
try:
|
|
137
125
|
relative_path = str(path.relative_to(self.project_root))
|
|
138
126
|
except ValueError:
|
|
139
127
|
# Path outside project root
|
|
140
128
|
return None
|
|
141
|
-
|
|
129
|
+
|
|
142
130
|
# Check if should be ignored
|
|
143
|
-
if
|
|
131
|
+
if should_ignore_path(path, self.project_root, self.ignore_patterns):
|
|
144
132
|
logger.debug(f"Ignoring change to {relative_path} (matches ignore pattern)")
|
|
145
133
|
return None
|
|
146
|
-
|
|
147
|
-
# Check debouncing
|
|
148
|
-
if self._should_debounce(relative_path):
|
|
149
|
-
logger.debug(f"Debouncing change to {relative_path}")
|
|
150
|
-
return None
|
|
151
|
-
|
|
152
|
-
# Get file info
|
|
153
|
-
file_info = self._get_file_info(path)
|
|
154
|
-
|
|
134
|
+
|
|
155
135
|
# Map event types to change types
|
|
156
136
|
if event_type in ["created", "added"]:
|
|
157
137
|
change_type = ChangeType.CREATED
|
|
@@ -164,7 +144,15 @@ class ChangeDetector:
|
|
|
164
144
|
else:
|
|
165
145
|
logger.warning(f"Unknown event type: {event_type}")
|
|
166
146
|
return None
|
|
167
|
-
|
|
147
|
+
|
|
148
|
+
# Check debouncing
|
|
149
|
+
if self._should_debounce(relative_path):
|
|
150
|
+
logger.debug(f"Debouncing change to {relative_path}")
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
# Get file info
|
|
154
|
+
file_info = self._get_file_info(path)
|
|
155
|
+
|
|
168
156
|
# Create change object
|
|
169
157
|
old_relative_path = None
|
|
170
158
|
if old_path:
|
|
@@ -172,44 +160,40 @@ class ChangeDetector:
|
|
|
172
160
|
old_relative_path = str(old_path.relative_to(self.project_root))
|
|
173
161
|
except ValueError:
|
|
174
162
|
pass
|
|
175
|
-
|
|
163
|
+
|
|
176
164
|
change = FileChange(
|
|
177
165
|
path=relative_path,
|
|
178
166
|
change_type=change_type,
|
|
179
167
|
timestamp=datetime.utcnow(),
|
|
180
168
|
old_path=old_relative_path,
|
|
181
169
|
size=file_info.get("size"),
|
|
182
|
-
hash=None # Will be computed later if needed
|
|
170
|
+
hash=None, # Will be computed later if needed
|
|
183
171
|
)
|
|
184
|
-
|
|
185
172
|
return change
|
|
186
|
-
|
|
173
|
+
|
|
187
174
|
def process_fs_event(
|
|
188
|
-
self,
|
|
189
|
-
event_type: str,
|
|
190
|
-
path: Path,
|
|
191
|
-
old_path: Optional[Path] = None
|
|
175
|
+
self, event_type: str, path: Path, old_path: Optional[Path] = None
|
|
192
176
|
) -> Optional[FileChange]:
|
|
193
177
|
"""
|
|
194
178
|
Process a file system event and return structured change.
|
|
195
|
-
|
|
179
|
+
|
|
196
180
|
Args:
|
|
197
181
|
event_type: Type of event (created, modified, deleted, moved)
|
|
198
182
|
path: Path that changed
|
|
199
183
|
old_path: Old path (for moves)
|
|
200
|
-
|
|
184
|
+
|
|
201
185
|
Returns:
|
|
202
186
|
FileChange object or None if ignored
|
|
203
187
|
"""
|
|
204
188
|
change = self._classify_change(path, event_type, old_path)
|
|
205
|
-
|
|
189
|
+
|
|
206
190
|
if change:
|
|
207
191
|
self.recent_changes.append(change)
|
|
208
|
-
|
|
192
|
+
|
|
209
193
|
# Update statistics
|
|
210
194
|
self.stats.total_changes += 1
|
|
211
195
|
self.stats.last_change = change.timestamp
|
|
212
|
-
|
|
196
|
+
|
|
213
197
|
if change.change_type == ChangeType.CREATED:
|
|
214
198
|
self.stats.creates += 1
|
|
215
199
|
elif change.change_type == ChangeType.MODIFIED:
|
|
@@ -218,95 +202,127 @@ class ChangeDetector:
|
|
|
218
202
|
self.stats.deletions += 1
|
|
219
203
|
elif change.change_type == ChangeType.MOVED:
|
|
220
204
|
self.stats.moves += 1
|
|
221
|
-
|
|
205
|
+
|
|
222
206
|
logger.info(f"Detected change: {change.change_type.value} {change.path}")
|
|
223
|
-
|
|
207
|
+
|
|
224
208
|
return change
|
|
225
|
-
|
|
209
|
+
|
|
226
210
|
def get_recent_changes(
|
|
227
211
|
self,
|
|
228
212
|
limit: Optional[int] = None,
|
|
229
|
-
change_types: Optional[List[ChangeType]] = None
|
|
213
|
+
change_types: Optional[List[ChangeType]] = None,
|
|
230
214
|
) -> List[FileChange]:
|
|
231
215
|
"""
|
|
232
216
|
Get recent changes with optional filtering.
|
|
233
|
-
|
|
217
|
+
|
|
234
218
|
Args:
|
|
235
219
|
limit: Maximum number of changes to return
|
|
236
220
|
change_types: Filter by change types
|
|
237
|
-
|
|
221
|
+
|
|
238
222
|
Returns:
|
|
239
223
|
List of recent changes
|
|
240
224
|
"""
|
|
241
225
|
changes = self.recent_changes
|
|
242
|
-
|
|
226
|
+
|
|
243
227
|
# Filter by change types
|
|
244
228
|
if change_types:
|
|
245
229
|
changes = [c for c in changes if c.change_type in change_types]
|
|
246
|
-
|
|
230
|
+
|
|
247
231
|
# Sort by timestamp (most recent first)
|
|
248
232
|
changes = sorted(changes, key=lambda c: c.timestamp, reverse=True)
|
|
249
|
-
|
|
233
|
+
|
|
250
234
|
# Apply limit
|
|
251
235
|
if limit:
|
|
252
236
|
changes = changes[:limit]
|
|
253
|
-
|
|
237
|
+
|
|
254
238
|
return changes
|
|
255
|
-
|
|
239
|
+
|
|
256
240
|
def clear_recent_changes(self) -> int:
|
|
257
241
|
"""Clear recent changes and return count cleared."""
|
|
258
242
|
count = len(self.recent_changes)
|
|
259
243
|
self.recent_changes.clear()
|
|
260
244
|
return count
|
|
261
|
-
|
|
245
|
+
|
|
262
246
|
def get_changes_since(self, since: datetime) -> List[FileChange]:
|
|
263
247
|
"""Get all changes since a specific timestamp."""
|
|
264
|
-
return [
|
|
265
|
-
|
|
266
|
-
if change.timestamp >= since
|
|
267
|
-
]
|
|
268
|
-
|
|
248
|
+
return [change for change in self.recent_changes if change.timestamp >= since]
|
|
249
|
+
|
|
269
250
|
def get_stats(self) -> ChangeStats:
|
|
270
251
|
"""Get change detection statistics."""
|
|
271
252
|
return self.stats
|
|
272
|
-
|
|
253
|
+
|
|
273
254
|
def reset_stats(self) -> None:
|
|
274
255
|
"""Reset change detection statistics."""
|
|
275
256
|
self.stats = ChangeStats(start_time=datetime.utcnow())
|
|
276
|
-
|
|
257
|
+
|
|
277
258
|
def get_changed_files(self, since: Optional[datetime] = None) -> Set[str]:
|
|
278
259
|
"""Get set of file paths that have changed."""
|
|
279
260
|
changes = self.recent_changes
|
|
280
|
-
|
|
261
|
+
|
|
281
262
|
if since:
|
|
282
263
|
changes = [c for c in changes if c.timestamp >= since]
|
|
283
|
-
|
|
264
|
+
|
|
284
265
|
# Collect unique file paths
|
|
285
266
|
changed_files = set()
|
|
286
267
|
for change in changes:
|
|
287
268
|
changed_files.add(change.path)
|
|
288
269
|
if change.old_path: # For moves
|
|
289
270
|
changed_files.add(change.old_path)
|
|
290
|
-
|
|
271
|
+
|
|
291
272
|
return changed_files
|
|
292
|
-
|
|
273
|
+
|
|
293
274
|
def is_code_file(self, path: str) -> bool:
|
|
294
275
|
"""Check if a file is likely a code file."""
|
|
295
276
|
code_extensions = {
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
277
|
+
".py",
|
|
278
|
+
".js",
|
|
279
|
+
".ts",
|
|
280
|
+
".jsx",
|
|
281
|
+
".tsx",
|
|
282
|
+
".java",
|
|
283
|
+
".cpp",
|
|
284
|
+
".c",
|
|
285
|
+
".h",
|
|
286
|
+
".cs",
|
|
287
|
+
".php",
|
|
288
|
+
".rb",
|
|
289
|
+
".go",
|
|
290
|
+
".rs",
|
|
291
|
+
".swift",
|
|
292
|
+
".kt",
|
|
293
|
+
".scala",
|
|
294
|
+
".clj",
|
|
295
|
+
".cljs",
|
|
296
|
+
".hs",
|
|
297
|
+
".ml",
|
|
298
|
+
".fs",
|
|
299
|
+
".ex",
|
|
300
|
+
".exs",
|
|
301
|
+
".cr",
|
|
302
|
+
".dart",
|
|
303
|
+
".lua",
|
|
304
|
+
".pl",
|
|
305
|
+
".sh",
|
|
306
|
+
".bash",
|
|
307
|
+
".zsh",
|
|
308
|
+
".fish",
|
|
309
|
+
".sql",
|
|
310
|
+
".r",
|
|
311
|
+
".m",
|
|
312
|
+
".mm",
|
|
313
|
+
".vim",
|
|
314
|
+
".el",
|
|
315
|
+
".lisp",
|
|
316
|
+
".scm",
|
|
301
317
|
}
|
|
302
|
-
|
|
318
|
+
|
|
303
319
|
return Path(path).suffix.lower() in code_extensions
|
|
304
|
-
|
|
320
|
+
|
|
305
321
|
def get_code_changes(self, since: Optional[datetime] = None) -> List[FileChange]:
|
|
306
322
|
"""Get changes to code files only."""
|
|
307
323
|
changes = self.get_recent_changes()
|
|
308
|
-
|
|
324
|
+
|
|
309
325
|
if since:
|
|
310
326
|
changes = [c for c in changes if c.timestamp >= since]
|
|
311
|
-
|
|
327
|
+
|
|
312
328
|
return [c for c in changes if self.is_code_file(c.path)]
|