mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (49) hide show
  1. mcp_vector_search/__init__.py +3 -2
  2. mcp_vector_search/cli/commands/auto_index.py +397 -0
  3. mcp_vector_search/cli/commands/config.py +88 -40
  4. mcp_vector_search/cli/commands/index.py +198 -52
  5. mcp_vector_search/cli/commands/init.py +471 -58
  6. mcp_vector_search/cli/commands/install.py +284 -0
  7. mcp_vector_search/cli/commands/mcp.py +495 -0
  8. mcp_vector_search/cli/commands/search.py +241 -87
  9. mcp_vector_search/cli/commands/status.py +184 -58
  10. mcp_vector_search/cli/commands/watch.py +34 -35
  11. mcp_vector_search/cli/didyoumean.py +184 -0
  12. mcp_vector_search/cli/export.py +320 -0
  13. mcp_vector_search/cli/history.py +292 -0
  14. mcp_vector_search/cli/interactive.py +342 -0
  15. mcp_vector_search/cli/main.py +175 -27
  16. mcp_vector_search/cli/output.py +63 -45
  17. mcp_vector_search/config/defaults.py +50 -36
  18. mcp_vector_search/config/settings.py +49 -35
  19. mcp_vector_search/core/auto_indexer.py +298 -0
  20. mcp_vector_search/core/connection_pool.py +322 -0
  21. mcp_vector_search/core/database.py +335 -25
  22. mcp_vector_search/core/embeddings.py +73 -29
  23. mcp_vector_search/core/exceptions.py +19 -2
  24. mcp_vector_search/core/factory.py +310 -0
  25. mcp_vector_search/core/git_hooks.py +345 -0
  26. mcp_vector_search/core/indexer.py +237 -73
  27. mcp_vector_search/core/models.py +21 -19
  28. mcp_vector_search/core/project.py +73 -58
  29. mcp_vector_search/core/scheduler.py +330 -0
  30. mcp_vector_search/core/search.py +574 -86
  31. mcp_vector_search/core/watcher.py +48 -46
  32. mcp_vector_search/mcp/__init__.py +4 -0
  33. mcp_vector_search/mcp/__main__.py +25 -0
  34. mcp_vector_search/mcp/server.py +701 -0
  35. mcp_vector_search/parsers/base.py +30 -31
  36. mcp_vector_search/parsers/javascript.py +74 -48
  37. mcp_vector_search/parsers/python.py +57 -49
  38. mcp_vector_search/parsers/registry.py +47 -32
  39. mcp_vector_search/parsers/text.py +179 -0
  40. mcp_vector_search/utils/__init__.py +40 -0
  41. mcp_vector_search/utils/gitignore.py +229 -0
  42. mcp_vector_search/utils/timing.py +334 -0
  43. mcp_vector_search/utils/version.py +47 -0
  44. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/METADATA +173 -7
  45. mcp_vector_search-0.4.12.dist-info/RECORD +54 -0
  46. mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
  47. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/WHEEL +0 -0
  48. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/entry_points.txt +0 -0
  49. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,322 @@
1
+ """Connection pooling for vector database operations."""
2
+
3
+ import asyncio
4
+ import time
5
+ from collections.abc import AsyncGenerator
6
+ from contextlib import asynccontextmanager
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from loguru import logger
12
+
13
+ from .exceptions import DatabaseError
14
+
15
+
16
+ @dataclass
17
+ class PooledConnection:
18
+ """Represents a pooled database connection."""
19
+
20
+ client: Any
21
+ collection: Any
22
+ created_at: float
23
+ last_used: float
24
+ in_use: bool = False
25
+ use_count: int = 0
26
+
27
+
28
+ class ChromaConnectionPool:
29
+ """Connection pool for ChromaDB operations."""
30
+
31
+ def __init__(
32
+ self,
33
+ persist_directory: Path,
34
+ embedding_function: Any,
35
+ collection_name: str = "code_search",
36
+ max_connections: int = 10,
37
+ min_connections: int = 2,
38
+ max_idle_time: float = 300.0, # 5 minutes
39
+ max_connection_age: float = 3600.0, # 1 hour
40
+ ):
41
+ """Initialize connection pool.
42
+
43
+ Args:
44
+ persist_directory: Directory to persist database
45
+ embedding_function: Function to generate embeddings
46
+ collection_name: Name of the collection
47
+ max_connections: Maximum number of connections in pool
48
+ min_connections: Minimum number of connections to maintain
49
+ max_idle_time: Maximum time a connection can be idle (seconds)
50
+ max_connection_age: Maximum age of a connection (seconds)
51
+ """
52
+ self.persist_directory = persist_directory
53
+ self.embedding_function = embedding_function
54
+ self.collection_name = collection_name
55
+ self.max_connections = max_connections
56
+ self.min_connections = min_connections
57
+ self.max_idle_time = max_idle_time
58
+ self.max_connection_age = max_connection_age
59
+
60
+ self._pool: list[PooledConnection] = []
61
+ self._lock = asyncio.Lock()
62
+ self._initialized = False
63
+ self._cleanup_task: asyncio.Task | None = None
64
+
65
+ # Statistics
66
+ self._stats = {
67
+ "connections_created": 0,
68
+ "connections_reused": 0,
69
+ "connections_expired": 0,
70
+ "pool_hits": 0,
71
+ "pool_misses": 0,
72
+ }
73
+
74
+ async def initialize(self) -> None:
75
+ """Initialize the connection pool."""
76
+ if self._initialized:
77
+ return
78
+
79
+ async with self._lock:
80
+ if self._initialized:
81
+ return
82
+
83
+ # Create minimum number of connections
84
+ for _ in range(self.min_connections):
85
+ conn = await self._create_connection()
86
+ self._pool.append(conn)
87
+
88
+ # Start cleanup task
89
+ self._cleanup_task = asyncio.create_task(self._cleanup_loop())
90
+ self._initialized = True
91
+
92
+ logger.info(
93
+ f"Connection pool initialized with {len(self._pool)} connections"
94
+ )
95
+
96
+ async def close(self) -> None:
97
+ """Close all connections and cleanup."""
98
+ if not self._initialized:
99
+ return
100
+
101
+ async with self._lock:
102
+ # Cancel cleanup task
103
+ if self._cleanup_task:
104
+ self._cleanup_task.cancel()
105
+ try:
106
+ await self._cleanup_task
107
+ except asyncio.CancelledError:
108
+ pass
109
+
110
+ # Close all connections
111
+ for conn in self._pool:
112
+ await self._close_connection(conn)
113
+
114
+ self._pool.clear()
115
+ self._initialized = False
116
+
117
+ logger.info("Connection pool closed")
118
+
119
+ async def _create_connection(self) -> PooledConnection:
120
+ """Create a new database connection."""
121
+ try:
122
+ import chromadb
123
+
124
+ # Ensure directory exists
125
+ self.persist_directory.mkdir(parents=True, exist_ok=True)
126
+
127
+ # Create client
128
+ client = chromadb.PersistentClient(
129
+ path=str(self.persist_directory),
130
+ settings=chromadb.Settings(
131
+ anonymized_telemetry=False,
132
+ allow_reset=True,
133
+ ),
134
+ )
135
+
136
+ # Create or get collection
137
+ collection = client.get_or_create_collection(
138
+ name=self.collection_name,
139
+ embedding_function=self.embedding_function,
140
+ metadata={
141
+ "description": "Semantic code search collection",
142
+ },
143
+ )
144
+
145
+ conn = PooledConnection(
146
+ client=client,
147
+ collection=collection,
148
+ created_at=time.time(),
149
+ last_used=time.time(),
150
+ )
151
+
152
+ self._stats["connections_created"] += 1
153
+ logger.debug(
154
+ f"Created new database connection (total: {self._stats['connections_created']})"
155
+ )
156
+
157
+ return conn
158
+
159
+ except Exception as e:
160
+ logger.error(f"Failed to create database connection: {e}")
161
+ raise DatabaseError(f"Connection creation failed: {e}") from e
162
+
163
+ async def _close_connection(self, conn: PooledConnection) -> None:
164
+ """Close a database connection."""
165
+ try:
166
+ # ChromaDB doesn't require explicit closing
167
+ conn.client = None
168
+ conn.collection = None
169
+ logger.debug("Closed database connection")
170
+ except Exception as e:
171
+ logger.warning(f"Error closing connection: {e}")
172
+
173
+ @asynccontextmanager
174
+ async def get_connection(self) -> AsyncGenerator[PooledConnection, None]:
175
+ """Get a connection from the pool."""
176
+ if not self._initialized:
177
+ await self.initialize()
178
+
179
+ conn = None
180
+ try:
181
+ # Get connection from pool
182
+ conn = await self._acquire_connection()
183
+ yield conn
184
+ finally:
185
+ # Return connection to pool
186
+ if conn:
187
+ await self._release_connection(conn)
188
+
189
+ async def _acquire_connection(self) -> PooledConnection:
190
+ """Acquire a connection from the pool."""
191
+ async with self._lock:
192
+ # Try to find an available connection
193
+ for conn in self._pool:
194
+ if not conn.in_use and self._is_connection_valid(conn):
195
+ conn.in_use = True
196
+ conn.last_used = time.time()
197
+ conn.use_count += 1
198
+ self._stats["pool_hits"] += 1
199
+ self._stats["connections_reused"] += 1
200
+ logger.debug(f"Reused connection (use count: {conn.use_count})")
201
+ return conn
202
+
203
+ # No available connection, create new one if under limit
204
+ if len(self._pool) < self.max_connections:
205
+ conn = await self._create_connection()
206
+ conn.in_use = True
207
+ self._pool.append(conn)
208
+ self._stats["pool_misses"] += 1
209
+ logger.debug(f"Created new connection (pool size: {len(self._pool)})")
210
+ return conn
211
+
212
+ # Pool is full, wait for a connection to become available
213
+ self._stats["pool_misses"] += 1
214
+ logger.warning(
215
+ "Connection pool exhausted, waiting for available connection"
216
+ )
217
+
218
+ # Wait for a connection (with timeout)
219
+ timeout = 30.0 # 30 seconds
220
+ start_time = time.time()
221
+
222
+ while time.time() - start_time < timeout:
223
+ await asyncio.sleep(0.1)
224
+ for conn in self._pool:
225
+ if not conn.in_use and self._is_connection_valid(conn):
226
+ conn.in_use = True
227
+ conn.last_used = time.time()
228
+ conn.use_count += 1
229
+ self._stats["connections_reused"] += 1
230
+ return conn
231
+
232
+ raise DatabaseError("Connection pool timeout: no connections available")
233
+
234
+ async def _release_connection(self, conn: PooledConnection) -> None:
235
+ """Release a connection back to the pool."""
236
+ async with self._lock:
237
+ conn.in_use = False
238
+ conn.last_used = time.time()
239
+ logger.debug(f"Released connection (use count: {conn.use_count})")
240
+
241
+ def _is_connection_valid(self, conn: PooledConnection) -> bool:
242
+ """Check if a connection is still valid."""
243
+ now = time.time()
244
+
245
+ # Check age
246
+ if now - conn.created_at > self.max_connection_age:
247
+ return False
248
+
249
+ # Check if idle too long
250
+ if now - conn.last_used > self.max_idle_time:
251
+ return False
252
+
253
+ # Check if client/collection are still valid
254
+ if not conn.client or not conn.collection:
255
+ return False
256
+
257
+ return True
258
+
259
+ async def _cleanup_loop(self) -> None:
260
+ """Background task to cleanup expired connections."""
261
+ while True:
262
+ try:
263
+ await asyncio.sleep(60) # Check every minute
264
+ await self._cleanup_expired_connections()
265
+ except asyncio.CancelledError:
266
+ break
267
+ except Exception as e:
268
+ logger.error(f"Error in connection cleanup: {e}")
269
+
270
+ async def _cleanup_expired_connections(self) -> None:
271
+ """Remove expired connections from the pool."""
272
+ async with self._lock:
273
+ expired_connections = []
274
+
275
+ for conn in self._pool:
276
+ if not conn.in_use and not self._is_connection_valid(conn):
277
+ expired_connections.append(conn)
278
+
279
+ # Remove expired connections
280
+ for conn in expired_connections:
281
+ self._pool.remove(conn)
282
+ await self._close_connection(conn)
283
+ self._stats["connections_expired"] += 1
284
+
285
+ if expired_connections:
286
+ logger.debug(
287
+ f"Cleaned up {len(expired_connections)} expired connections"
288
+ )
289
+
290
+ # Ensure minimum connections
291
+ while len(self._pool) < self.min_connections:
292
+ try:
293
+ conn = await self._create_connection()
294
+ self._pool.append(conn)
295
+ except Exception as e:
296
+ logger.error(f"Failed to create minimum connection: {e}")
297
+ break
298
+
299
+ def get_stats(self) -> dict[str, Any]:
300
+ """Get connection pool statistics."""
301
+ active_connections = sum(1 for conn in self._pool if conn.in_use)
302
+ idle_connections = len(self._pool) - active_connections
303
+
304
+ return {
305
+ **self._stats,
306
+ "pool_size": len(self._pool),
307
+ "active_connections": active_connections,
308
+ "idle_connections": idle_connections,
309
+ "max_connections": self.max_connections,
310
+ "min_connections": self.min_connections,
311
+ }
312
+
313
+ async def health_check(self) -> bool:
314
+ """Perform a health check on the connection pool."""
315
+ try:
316
+ async with self.get_connection() as conn:
317
+ # Try a simple operation
318
+ conn.collection.count()
319
+ return True
320
+ except Exception as e:
321
+ logger.error(f"Connection pool health check failed: {e}")
322
+ return False