mcp-vector-search 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (35) hide show
  1. mcp_vector_search/__init__.py +9 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/config.py +303 -0
  5. mcp_vector_search/cli/commands/index.py +304 -0
  6. mcp_vector_search/cli/commands/init.py +212 -0
  7. mcp_vector_search/cli/commands/search.py +395 -0
  8. mcp_vector_search/cli/commands/status.py +340 -0
  9. mcp_vector_search/cli/commands/watch.py +288 -0
  10. mcp_vector_search/cli/main.py +117 -0
  11. mcp_vector_search/cli/output.py +242 -0
  12. mcp_vector_search/config/__init__.py +1 -0
  13. mcp_vector_search/config/defaults.py +175 -0
  14. mcp_vector_search/config/settings.py +108 -0
  15. mcp_vector_search/core/__init__.py +1 -0
  16. mcp_vector_search/core/database.py +431 -0
  17. mcp_vector_search/core/embeddings.py +250 -0
  18. mcp_vector_search/core/exceptions.py +66 -0
  19. mcp_vector_search/core/indexer.py +310 -0
  20. mcp_vector_search/core/models.py +174 -0
  21. mcp_vector_search/core/project.py +304 -0
  22. mcp_vector_search/core/search.py +324 -0
  23. mcp_vector_search/core/watcher.py +320 -0
  24. mcp_vector_search/mcp/__init__.py +1 -0
  25. mcp_vector_search/parsers/__init__.py +1 -0
  26. mcp_vector_search/parsers/base.py +180 -0
  27. mcp_vector_search/parsers/javascript.py +238 -0
  28. mcp_vector_search/parsers/python.py +407 -0
  29. mcp_vector_search/parsers/registry.py +187 -0
  30. mcp_vector_search/py.typed +1 -0
  31. mcp_vector_search-0.0.3.dist-info/METADATA +333 -0
  32. mcp_vector_search-0.0.3.dist-info/RECORD +35 -0
  33. mcp_vector_search-0.0.3.dist-info/WHEEL +4 -0
  34. mcp_vector_search-0.0.3.dist-info/entry_points.txt +2 -0
  35. mcp_vector_search-0.0.3.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,320 @@
1
+ """File system watcher for incremental indexing."""
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+ from typing import List, Optional, Set, Callable, Awaitable, Union
6
+ from threading import Thread
7
+ import time
8
+ from concurrent.futures import Future
9
+
10
+ from loguru import logger
11
+ from watchdog.observers import Observer
12
+ from watchdog.events import FileSystemEventHandler, FileSystemEvent
13
+
14
+ from ..config.settings import ProjectConfig
15
+ from .indexer import SemanticIndexer
16
+ from .database import ChromaVectorDatabase
17
+
18
+
19
+ class CodeFileHandler(FileSystemEventHandler):
20
+ """Handler for code file changes."""
21
+
22
+ def __init__(
23
+ self,
24
+ file_extensions: List[str],
25
+ ignore_patterns: List[str],
26
+ callback: Callable[[str, str], Awaitable[None]],
27
+ loop: asyncio.AbstractEventLoop,
28
+ debounce_delay: float = 1.0,
29
+ ):
30
+ """Initialize file handler.
31
+
32
+ Args:
33
+ file_extensions: List of file extensions to watch
34
+ ignore_patterns: List of patterns to ignore
35
+ callback: Async callback function for file changes
36
+ loop: Event loop to schedule tasks on
37
+ debounce_delay: Delay in seconds to debounce rapid changes
38
+ """
39
+ super().__init__()
40
+ self.file_extensions = set(file_extensions)
41
+ self.ignore_patterns = ignore_patterns
42
+ self.callback = callback
43
+ self.loop = loop
44
+ self.debounce_delay = debounce_delay
45
+ self.pending_changes: Set[str] = set()
46
+ self.last_change_time: float = 0
47
+ self.debounce_task: Optional[Union[asyncio.Task, Future]] = None
48
+
49
+ def should_process_file(self, file_path: str) -> bool:
50
+ """Check if file should be processed."""
51
+ path = Path(file_path)
52
+
53
+ # Check file extension
54
+ if path.suffix not in self.file_extensions:
55
+ return False
56
+
57
+ # Check ignore patterns
58
+ for pattern in self.ignore_patterns:
59
+ if pattern in str(path):
60
+ return False
61
+
62
+ return True
63
+
64
+ def on_modified(self, event: FileSystemEvent) -> None:
65
+ """Handle file modification."""
66
+ if not event.is_directory and self.should_process_file(event.src_path):
67
+ self._schedule_change(event.src_path, "modified")
68
+
69
+ def on_created(self, event: FileSystemEvent) -> None:
70
+ """Handle file creation."""
71
+ if not event.is_directory and self.should_process_file(event.src_path):
72
+ self._schedule_change(event.src_path, "created")
73
+
74
+ def on_deleted(self, event: FileSystemEvent) -> None:
75
+ """Handle file deletion."""
76
+ if not event.is_directory and self.should_process_file(event.src_path):
77
+ self._schedule_change(event.src_path, "deleted")
78
+
79
+ def on_moved(self, event: FileSystemEvent) -> None:
80
+ """Handle file move/rename."""
81
+ if hasattr(event, 'dest_path'):
82
+ # Handle rename/move
83
+ if not event.is_directory:
84
+ if self.should_process_file(event.src_path):
85
+ self._schedule_change(event.src_path, "deleted")
86
+ if self.should_process_file(event.dest_path):
87
+ self._schedule_change(event.dest_path, "created")
88
+
89
+ def _schedule_change(self, file_path: str, change_type: str) -> None:
90
+ """Schedule a file change for processing with debouncing."""
91
+ self.pending_changes.add(f"{change_type}:{file_path}")
92
+ self.last_change_time = time.time()
93
+
94
+ # Cancel existing debounce task
95
+ if self.debounce_task and not self.debounce_task.done():
96
+ self.debounce_task.cancel()
97
+
98
+ # Schedule new debounce task using the stored loop
99
+ future = asyncio.run_coroutine_threadsafe(
100
+ self._debounced_process(), self.loop
101
+ )
102
+ # Store the future as our task (it has a done() method)
103
+ self.debounce_task = future
104
+
105
+ async def _debounced_process(self) -> None:
106
+ """Process pending changes after debounce delay."""
107
+ await asyncio.sleep(self.debounce_delay)
108
+
109
+ # Check if more changes occurred during debounce
110
+ if time.time() - self.last_change_time < self.debounce_delay:
111
+ return
112
+
113
+ # Process all pending changes
114
+ changes = self.pending_changes.copy()
115
+ self.pending_changes.clear()
116
+
117
+ for change in changes:
118
+ change_type, file_path = change.split(":", 1)
119
+ try:
120
+ await self.callback(file_path, change_type)
121
+ except Exception as e:
122
+ logger.error(f"Error processing file change {file_path}: {e}")
123
+
124
+
125
+ class FileWatcher:
126
+ """File system watcher for incremental indexing."""
127
+
128
+ def __init__(
129
+ self,
130
+ project_root: Path,
131
+ config: ProjectConfig,
132
+ indexer: SemanticIndexer,
133
+ database: ChromaVectorDatabase,
134
+ ):
135
+ """Initialize file watcher.
136
+
137
+ Args:
138
+ project_root: Root directory to watch
139
+ config: Project configuration
140
+ indexer: Semantic indexer instance
141
+ database: Vector database instance
142
+ """
143
+ self.project_root = project_root
144
+ self.config = config
145
+ self.indexer = indexer
146
+ self.database = database
147
+ self.observer: Optional[Observer] = None
148
+ self.handler: Optional[CodeFileHandler] = None
149
+ self.is_running = False
150
+
151
+ async def start(self) -> None:
152
+ """Start watching for file changes."""
153
+ if self.is_running:
154
+ logger.warning("File watcher is already running")
155
+ return
156
+
157
+ logger.info(f"Starting file watcher for {self.project_root}")
158
+
159
+ # Create handler
160
+ loop = asyncio.get_running_loop()
161
+ self.handler = CodeFileHandler(
162
+ file_extensions=self.config.file_extensions,
163
+ ignore_patterns=self._get_ignore_patterns(),
164
+ callback=self._handle_file_change,
165
+ loop=loop,
166
+ debounce_delay=1.0,
167
+ )
168
+
169
+ # Create observer
170
+ self.observer = Observer()
171
+ self.observer.schedule(
172
+ self.handler,
173
+ str(self.project_root),
174
+ recursive=True
175
+ )
176
+
177
+ # Start observer in a separate thread
178
+ self.observer.start()
179
+ self.is_running = True
180
+
181
+ logger.info("File watcher started successfully")
182
+
183
+ async def stop(self) -> None:
184
+ """Stop watching for file changes."""
185
+ if not self.is_running:
186
+ return
187
+
188
+ logger.info("Stopping file watcher")
189
+
190
+ if self.observer:
191
+ self.observer.stop()
192
+ self.observer.join()
193
+ self.observer = None
194
+
195
+ self.handler = None
196
+ self.is_running = False
197
+
198
+ logger.info("File watcher stopped")
199
+
200
+ def _get_ignore_patterns(self) -> List[str]:
201
+ """Get patterns to ignore during watching."""
202
+ default_patterns = [
203
+ ".git", ".svn", ".hg",
204
+ "__pycache__", ".pytest_cache",
205
+ "node_modules", ".venv", "venv",
206
+ ".DS_Store", "Thumbs.db",
207
+ ".idea", ".vscode",
208
+ "build", "dist", "target",
209
+ ".mcp-vector-search", # Ignore our own index directory
210
+ ]
211
+
212
+ # Add any custom ignore patterns from config
213
+ # TODO: Add custom ignore patterns to config
214
+ return default_patterns
215
+
216
+ async def _handle_file_change(self, file_path: str, change_type: str) -> None:
217
+ """Handle a file change event.
218
+
219
+ Args:
220
+ file_path: Path to the changed file
221
+ change_type: Type of change (created, modified, deleted)
222
+ """
223
+ path = Path(file_path)
224
+ logger.debug(f"Processing file change: {change_type} {path}")
225
+
226
+ try:
227
+ if change_type == "deleted":
228
+ # Remove chunks for deleted file
229
+ await self._remove_file_chunks(path)
230
+ elif change_type in ("created", "modified"):
231
+ # Re-index the file
232
+ await self._reindex_file(path)
233
+
234
+ logger.info(f"Processed {change_type} for {path.name}")
235
+
236
+ except Exception as e:
237
+ logger.error(f"Failed to process {change_type} for {path}: {e}")
238
+
239
+ async def _remove_file_chunks(self, file_path: Path) -> None:
240
+ """Remove all chunks for a deleted file."""
241
+ # Get relative path for consistent IDs
242
+ try:
243
+ relative_path = file_path.relative_to(self.project_root)
244
+ except ValueError:
245
+ relative_path = file_path
246
+
247
+ # Remove chunks from database
248
+ await self.database.remove_file_chunks(str(relative_path))
249
+ logger.debug(f"Removed chunks for deleted file: {relative_path}")
250
+
251
+ async def _reindex_file(self, file_path: Path) -> None:
252
+ """Re-index a single file."""
253
+ if not file_path.exists():
254
+ logger.warning(f"File no longer exists: {file_path}")
255
+ return
256
+
257
+ # Remove existing chunks first
258
+ await self._remove_file_chunks(file_path)
259
+
260
+ # Index the file
261
+ chunks_indexed = await self.indexer.index_file(file_path)
262
+ logger.debug(f"Re-indexed {file_path.name}: {chunks_indexed} chunks")
263
+
264
+ async def __aenter__(self):
265
+ """Async context manager entry."""
266
+ await self.start()
267
+ return self
268
+
269
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
270
+ """Async context manager exit."""
271
+ await self.stop()
272
+
273
+
274
+ class WatcherManager:
275
+ """Manager for file watchers across multiple projects."""
276
+
277
+ def __init__(self):
278
+ """Initialize watcher manager."""
279
+ self.watchers: dict[str, FileWatcher] = {}
280
+
281
+ async def start_watcher(
282
+ self,
283
+ project_root: Path,
284
+ config: ProjectConfig,
285
+ indexer: SemanticIndexer,
286
+ database: ChromaVectorDatabase,
287
+ ) -> FileWatcher:
288
+ """Start a file watcher for a project."""
289
+ project_key = str(project_root)
290
+
291
+ if project_key in self.watchers:
292
+ logger.warning(f"Watcher already exists for {project_root}")
293
+ return self.watchers[project_key]
294
+
295
+ watcher = FileWatcher(project_root, config, indexer, database)
296
+ await watcher.start()
297
+
298
+ self.watchers[project_key] = watcher
299
+ return watcher
300
+
301
+ async def stop_watcher(self, project_root: Path) -> None:
302
+ """Stop a file watcher for a project."""
303
+ project_key = str(project_root)
304
+
305
+ if project_key not in self.watchers:
306
+ logger.warning(f"No watcher found for {project_root}")
307
+ return
308
+
309
+ watcher = self.watchers.pop(project_key)
310
+ await watcher.stop()
311
+
312
+ async def stop_all(self) -> None:
313
+ """Stop all file watchers."""
314
+ for watcher in list(self.watchers.values()):
315
+ await watcher.stop()
316
+ self.watchers.clear()
317
+
318
+ def is_watching(self, project_root: Path) -> bool:
319
+ """Check if a project is being watched."""
320
+ return str(project_root) in self.watchers
@@ -0,0 +1 @@
1
+ """MCP server integration for MCP Vector Search."""
@@ -0,0 +1 @@
1
+ """Language parsers for MCP Vector Search."""
@@ -0,0 +1,180 @@
1
+ """Base parser interface for MCP Vector Search."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+ from ..core.models import CodeChunk
8
+
9
+
10
+ class BaseParser(ABC):
11
+ """Abstract base class for language parsers."""
12
+
13
+ def __init__(self, language: str) -> None:
14
+ """Initialize parser for a specific language.
15
+
16
+ Args:
17
+ language: Programming language name
18
+ """
19
+ self.language = language
20
+
21
+ @abstractmethod
22
+ async def parse_file(self, file_path: Path) -> List[CodeChunk]:
23
+ """Parse a file and extract code chunks.
24
+
25
+ Args:
26
+ file_path: Path to the file to parse
27
+
28
+ Returns:
29
+ List of code chunks extracted from the file
30
+ """
31
+ ...
32
+
33
+ @abstractmethod
34
+ async def parse_content(self, content: str, file_path: Path) -> List[CodeChunk]:
35
+ """Parse content and extract code chunks.
36
+
37
+ Args:
38
+ content: File content to parse
39
+ file_path: Path to the source file (for metadata)
40
+
41
+ Returns:
42
+ List of code chunks extracted from the content
43
+ """
44
+ ...
45
+
46
+ def supports_file(self, file_path: Path) -> bool:
47
+ """Check if this parser supports the given file.
48
+
49
+ Args:
50
+ file_path: Path to check
51
+
52
+ Returns:
53
+ True if this parser can handle the file
54
+ """
55
+ return file_path.suffix.lower() in self.get_supported_extensions()
56
+
57
+ @abstractmethod
58
+ def get_supported_extensions(self) -> List[str]:
59
+ """Get list of file extensions supported by this parser.
60
+
61
+ Returns:
62
+ List of file extensions (including the dot)
63
+ """
64
+ ...
65
+
66
+ def _create_chunk(
67
+ self,
68
+ content: str,
69
+ file_path: Path,
70
+ start_line: int,
71
+ end_line: int,
72
+ chunk_type: str = "code",
73
+ function_name: Optional[str] = None,
74
+ class_name: Optional[str] = None,
75
+ docstring: Optional[str] = None,
76
+ ) -> CodeChunk:
77
+ """Create a code chunk with metadata.
78
+
79
+ Args:
80
+ content: Code content
81
+ file_path: Source file path
82
+ start_line: Starting line number (1-based)
83
+ end_line: Ending line number (1-based)
84
+ chunk_type: Type of chunk (code, function, class, etc.)
85
+ function_name: Function name if applicable
86
+ class_name: Class name if applicable
87
+ docstring: Docstring if applicable
88
+
89
+ Returns:
90
+ CodeChunk instance
91
+ """
92
+ return CodeChunk(
93
+ content=content.strip(),
94
+ file_path=file_path,
95
+ start_line=start_line,
96
+ end_line=end_line,
97
+ language=self.language,
98
+ chunk_type=chunk_type,
99
+ function_name=function_name,
100
+ class_name=class_name,
101
+ docstring=docstring,
102
+ )
103
+
104
+ def _split_into_lines(self, content: str) -> List[str]:
105
+ """Split content into lines, preserving line endings.
106
+
107
+ Args:
108
+ content: Content to split
109
+
110
+ Returns:
111
+ List of lines
112
+ """
113
+ return content.splitlines(keepends=True)
114
+
115
+ def _get_line_range(self, lines: List[str], start_line: int, end_line: int) -> str:
116
+ """Extract a range of lines from content.
117
+
118
+ Args:
119
+ lines: List of lines
120
+ start_line: Starting line number (1-based)
121
+ end_line: Ending line number (1-based)
122
+
123
+ Returns:
124
+ Content for the specified line range
125
+ """
126
+ # Convert to 0-based indexing
127
+ start_idx = max(0, start_line - 1)
128
+ end_idx = min(len(lines), end_line)
129
+
130
+ return "".join(lines[start_idx:end_idx])
131
+
132
+
133
+ class FallbackParser(BaseParser):
134
+ """Fallback parser for unsupported languages using simple text chunking."""
135
+
136
+ def __init__(self, language: str = "text") -> None:
137
+ """Initialize fallback parser."""
138
+ super().__init__(language)
139
+
140
+ async def parse_file(self, file_path: Path) -> List[CodeChunk]:
141
+ """Parse file using simple text chunking."""
142
+ try:
143
+ with open(file_path, "r", encoding="utf-8") as f:
144
+ content = f.read()
145
+ return await self.parse_content(content, file_path)
146
+ except Exception:
147
+ # Return empty list if file can't be read
148
+ return []
149
+
150
+ async def parse_content(self, content: str, file_path: Path) -> List[CodeChunk]:
151
+ """Parse content using simple text chunking."""
152
+ if not content.strip():
153
+ return []
154
+
155
+ lines = self._split_into_lines(content)
156
+ chunks = []
157
+
158
+ # Simple chunking: split into chunks of ~50 lines
159
+ chunk_size = 50
160
+ for i in range(0, len(lines), chunk_size):
161
+ start_line = i + 1
162
+ end_line = min(i + chunk_size, len(lines))
163
+
164
+ chunk_content = self._get_line_range(lines, start_line, end_line)
165
+
166
+ if chunk_content.strip():
167
+ chunk = self._create_chunk(
168
+ content=chunk_content,
169
+ file_path=file_path,
170
+ start_line=start_line,
171
+ end_line=end_line,
172
+ chunk_type="text",
173
+ )
174
+ chunks.append(chunk)
175
+
176
+ return chunks
177
+
178
+ def get_supported_extensions(self) -> List[str]:
179
+ """Fallback parser supports all extensions."""
180
+ return ["*"] # Special marker for "all extensions"