mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +3 -2
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/config.py +88 -40
- mcp_vector_search/cli/commands/index.py +198 -52
- mcp_vector_search/cli/commands/init.py +471 -58
- mcp_vector_search/cli/commands/install.py +284 -0
- mcp_vector_search/cli/commands/mcp.py +495 -0
- mcp_vector_search/cli/commands/search.py +241 -87
- mcp_vector_search/cli/commands/status.py +184 -58
- mcp_vector_search/cli/commands/watch.py +34 -35
- mcp_vector_search/cli/didyoumean.py +184 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +292 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +175 -27
- mcp_vector_search/cli/output.py +63 -45
- mcp_vector_search/config/defaults.py +50 -36
- mcp_vector_search/config/settings.py +49 -35
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/connection_pool.py +322 -0
- mcp_vector_search/core/database.py +335 -25
- mcp_vector_search/core/embeddings.py +73 -29
- mcp_vector_search/core/exceptions.py +19 -2
- mcp_vector_search/core/factory.py +310 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +237 -73
- mcp_vector_search/core/models.py +21 -19
- mcp_vector_search/core/project.py +73 -58
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +574 -86
- mcp_vector_search/core/watcher.py +48 -46
- mcp_vector_search/mcp/__init__.py +4 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +701 -0
- mcp_vector_search/parsers/base.py +30 -31
- mcp_vector_search/parsers/javascript.py +74 -48
- mcp_vector_search/parsers/python.py +57 -49
- mcp_vector_search/parsers/registry.py +47 -32
- mcp_vector_search/parsers/text.py +179 -0
- mcp_vector_search/utils/__init__.py +40 -0
- mcp_vector_search/utils/gitignore.py +229 -0
- mcp_vector_search/utils/timing.py +334 -0
- mcp_vector_search/utils/version.py +47 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/METADATA +173 -7
- mcp_vector_search-0.4.12.dist-info/RECORD +54 -0
- mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/WHEEL +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/entry_points.txt +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import json
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Dict, List, Optional
|
|
7
6
|
|
|
8
7
|
import aiofiles
|
|
9
8
|
from loguru import logger
|
|
@@ -17,7 +16,7 @@ class EmbeddingCache:
|
|
|
17
16
|
|
|
18
17
|
def __init__(self, cache_dir: Path, max_size: int = 1000) -> None:
|
|
19
18
|
"""Initialize embedding cache.
|
|
20
|
-
|
|
19
|
+
|
|
21
20
|
Args:
|
|
22
21
|
cache_dir: Directory to store cached embeddings
|
|
23
22
|
max_size: Maximum number of embeddings to keep in memory
|
|
@@ -25,45 +24,51 @@ class EmbeddingCache:
|
|
|
25
24
|
self.cache_dir = cache_dir
|
|
26
25
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
27
26
|
self.max_size = max_size
|
|
28
|
-
self._memory_cache:
|
|
27
|
+
self._memory_cache: dict[str, list[float]] = {}
|
|
28
|
+
self._access_order: list[str] = [] # For LRU eviction
|
|
29
|
+
self._cache_hits = 0
|
|
30
|
+
self._cache_misses = 0
|
|
29
31
|
|
|
30
32
|
def _hash_content(self, content: str) -> str:
|
|
31
33
|
"""Generate cache key from content."""
|
|
32
34
|
return hashlib.sha256(content.encode()).hexdigest()[:16]
|
|
33
35
|
|
|
34
|
-
async def get_embedding(self, content: str) ->
|
|
36
|
+
async def get_embedding(self, content: str) -> list[float] | None:
|
|
35
37
|
"""Get cached embedding for content."""
|
|
36
38
|
cache_key = self._hash_content(content)
|
|
37
39
|
|
|
38
40
|
# Check memory cache first
|
|
39
41
|
if cache_key in self._memory_cache:
|
|
42
|
+
self._cache_hits += 1
|
|
43
|
+
# Move to end for LRU
|
|
44
|
+
self._access_order.remove(cache_key)
|
|
45
|
+
self._access_order.append(cache_key)
|
|
40
46
|
return self._memory_cache[cache_key]
|
|
41
47
|
|
|
42
48
|
# Check disk cache
|
|
43
49
|
cache_file = self.cache_dir / f"{cache_key}.json"
|
|
44
50
|
if cache_file.exists():
|
|
45
51
|
try:
|
|
46
|
-
async with aiofiles.open(cache_file
|
|
52
|
+
async with aiofiles.open(cache_file) as f:
|
|
47
53
|
content_str = await f.read()
|
|
48
54
|
embedding = json.loads(content_str)
|
|
49
55
|
|
|
50
|
-
# Add to memory cache
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
56
|
+
# Add to memory cache with LRU management
|
|
57
|
+
self._add_to_memory_cache(cache_key, embedding)
|
|
58
|
+
self._cache_hits += 1
|
|
54
59
|
return embedding
|
|
55
60
|
except Exception as e:
|
|
56
61
|
logger.warning(f"Failed to load cached embedding: {e}")
|
|
57
62
|
|
|
63
|
+
self._cache_misses += 1
|
|
58
64
|
return None
|
|
59
65
|
|
|
60
|
-
async def store_embedding(self, content: str, embedding:
|
|
66
|
+
async def store_embedding(self, content: str, embedding: list[float]) -> None:
|
|
61
67
|
"""Store embedding in cache."""
|
|
62
68
|
cache_key = self._hash_content(content)
|
|
63
69
|
|
|
64
|
-
# Store in memory cache
|
|
65
|
-
|
|
66
|
-
self._memory_cache[cache_key] = embedding
|
|
70
|
+
# Store in memory cache with LRU management
|
|
71
|
+
self._add_to_memory_cache(cache_key, embedding)
|
|
67
72
|
|
|
68
73
|
# Store in disk cache
|
|
69
74
|
cache_file = self.cache_dir / f"{cache_key}.json"
|
|
@@ -73,17 +78,56 @@ class EmbeddingCache:
|
|
|
73
78
|
except Exception as e:
|
|
74
79
|
logger.warning(f"Failed to cache embedding: {e}")
|
|
75
80
|
|
|
81
|
+
def _add_to_memory_cache(self, cache_key: str, embedding: list[float]) -> None:
|
|
82
|
+
"""Add embedding to memory cache with LRU eviction.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
cache_key: Cache key for the embedding
|
|
86
|
+
embedding: Embedding vector to cache
|
|
87
|
+
"""
|
|
88
|
+
# If already in cache, update and move to end
|
|
89
|
+
if cache_key in self._memory_cache:
|
|
90
|
+
self._access_order.remove(cache_key)
|
|
91
|
+
self._access_order.append(cache_key)
|
|
92
|
+
self._memory_cache[cache_key] = embedding
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
# If cache is full, evict least recently used
|
|
96
|
+
if len(self._memory_cache) >= self.max_size:
|
|
97
|
+
lru_key = self._access_order.pop(0)
|
|
98
|
+
del self._memory_cache[lru_key]
|
|
99
|
+
|
|
100
|
+
# Add new embedding
|
|
101
|
+
self._memory_cache[cache_key] = embedding
|
|
102
|
+
self._access_order.append(cache_key)
|
|
103
|
+
|
|
76
104
|
def clear_memory_cache(self) -> None:
|
|
77
105
|
"""Clear the in-memory cache."""
|
|
78
106
|
self._memory_cache.clear()
|
|
107
|
+
self._access_order.clear()
|
|
108
|
+
|
|
109
|
+
def get_cache_stats(self) -> dict[str, any]:
|
|
110
|
+
"""Get cache performance statistics.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Dictionary with cache statistics
|
|
114
|
+
"""
|
|
115
|
+
total_requests = self._cache_hits + self._cache_misses
|
|
116
|
+
hit_rate = self._cache_hits / total_requests if total_requests > 0 else 0.0
|
|
117
|
+
disk_files = (
|
|
118
|
+
len(list(self.cache_dir.glob("*.json"))) if self.cache_dir.exists() else 0
|
|
119
|
+
)
|
|
79
120
|
|
|
80
|
-
def get_cache_stats(self) -> Dict[str, int]:
|
|
81
|
-
"""Get cache statistics."""
|
|
82
|
-
disk_files = len(list(self.cache_dir.glob("*.json")))
|
|
83
121
|
return {
|
|
84
|
-
"
|
|
85
|
-
"
|
|
86
|
-
"
|
|
122
|
+
"memory_cache_size": len(self._memory_cache),
|
|
123
|
+
"memory_cached": len(self._memory_cache), # Alias for compatibility
|
|
124
|
+
"max_cache_size": self.max_size,
|
|
125
|
+
"memory_limit": self.max_size, # Alias for compatibility
|
|
126
|
+
"cache_hits": self._cache_hits,
|
|
127
|
+
"cache_misses": self._cache_misses,
|
|
128
|
+
"hit_rate": round(hit_rate, 3),
|
|
129
|
+
"disk_cache_files": disk_files,
|
|
130
|
+
"disk_cached": disk_files, # Alias for compatibility
|
|
87
131
|
}
|
|
88
132
|
|
|
89
133
|
|
|
@@ -104,7 +148,7 @@ class CodeBERTEmbeddingFunction:
|
|
|
104
148
|
logger.error(f"Failed to load embedding model {model_name}: {e}")
|
|
105
149
|
raise EmbeddingError(f"Failed to load embedding model: {e}") from e
|
|
106
150
|
|
|
107
|
-
def __call__(self, input:
|
|
151
|
+
def __call__(self, input: list[str]) -> list[list[float]]:
|
|
108
152
|
"""Generate embeddings for input texts (ChromaDB interface)."""
|
|
109
153
|
try:
|
|
110
154
|
embeddings = self.model.encode(input, convert_to_numpy=True)
|
|
@@ -120,11 +164,11 @@ class BatchEmbeddingProcessor:
|
|
|
120
164
|
def __init__(
|
|
121
165
|
self,
|
|
122
166
|
embedding_function: CodeBERTEmbeddingFunction,
|
|
123
|
-
cache:
|
|
167
|
+
cache: EmbeddingCache | None = None,
|
|
124
168
|
batch_size: int = 32,
|
|
125
169
|
) -> None:
|
|
126
170
|
"""Initialize batch embedding processor.
|
|
127
|
-
|
|
171
|
+
|
|
128
172
|
Args:
|
|
129
173
|
embedding_function: Function to generate embeddings
|
|
130
174
|
cache: Optional embedding cache
|
|
@@ -134,12 +178,12 @@ class BatchEmbeddingProcessor:
|
|
|
134
178
|
self.cache = cache
|
|
135
179
|
self.batch_size = batch_size
|
|
136
180
|
|
|
137
|
-
async def process_batch(self, contents:
|
|
181
|
+
async def process_batch(self, contents: list[str]) -> list[list[float]]:
|
|
138
182
|
"""Process a batch of content for embeddings.
|
|
139
|
-
|
|
183
|
+
|
|
140
184
|
Args:
|
|
141
185
|
contents: List of text content to embed
|
|
142
|
-
|
|
186
|
+
|
|
143
187
|
Returns:
|
|
144
188
|
List of embeddings
|
|
145
189
|
"""
|
|
@@ -179,7 +223,7 @@ class BatchEmbeddingProcessor:
|
|
|
179
223
|
|
|
180
224
|
# Cache new embeddings and fill placeholders
|
|
181
225
|
for i, (content, embedding) in enumerate(
|
|
182
|
-
zip(uncached_contents, new_embeddings)
|
|
226
|
+
zip(uncached_contents, new_embeddings, strict=False)
|
|
183
227
|
):
|
|
184
228
|
if self.cache:
|
|
185
229
|
await self.cache.store_embedding(content, embedding)
|
|
@@ -191,7 +235,7 @@ class BatchEmbeddingProcessor:
|
|
|
191
235
|
|
|
192
236
|
return embeddings
|
|
193
237
|
|
|
194
|
-
def get_stats(self) ->
|
|
238
|
+
def get_stats(self) -> dict[str, any]:
|
|
195
239
|
"""Get processor statistics."""
|
|
196
240
|
stats = {
|
|
197
241
|
"model_name": self.embedding_function.model_name,
|
|
@@ -207,7 +251,7 @@ class BatchEmbeddingProcessor:
|
|
|
207
251
|
|
|
208
252
|
def create_embedding_function(
|
|
209
253
|
model_name: str = "microsoft/codebert-base",
|
|
210
|
-
cache_dir:
|
|
254
|
+
cache_dir: Path | None = None,
|
|
211
255
|
cache_size: int = 1000,
|
|
212
256
|
):
|
|
213
257
|
"""Create embedding function and cache.
|
|
@@ -236,7 +280,7 @@ def create_embedding_function(
|
|
|
236
280
|
model_name=actual_model
|
|
237
281
|
)
|
|
238
282
|
|
|
239
|
-
logger.
|
|
283
|
+
logger.debug(f"Created ChromaDB embedding function with model: {actual_model}")
|
|
240
284
|
|
|
241
285
|
except Exception as e:
|
|
242
286
|
logger.warning(f"Failed to create ChromaDB embedding function: {e}")
|
|
@@ -1,66 +1,83 @@
|
|
|
1
1
|
"""Custom exception hierarchy for MCP Vector Search."""
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class MCPVectorSearchError(Exception):
|
|
7
7
|
"""Base exception for MCP Vector Search."""
|
|
8
8
|
|
|
9
|
-
def __init__(self, message: str, context:
|
|
9
|
+
def __init__(self, message: str, context: dict[str, Any] | None = None) -> None:
|
|
10
10
|
super().__init__(message)
|
|
11
11
|
self.context = context or {}
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class DatabaseError(MCPVectorSearchError):
|
|
15
15
|
"""Database-related errors."""
|
|
16
|
+
|
|
16
17
|
pass
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
class DatabaseInitializationError(DatabaseError):
|
|
20
21
|
"""Database initialization failed."""
|
|
22
|
+
|
|
21
23
|
pass
|
|
22
24
|
|
|
23
25
|
|
|
24
26
|
class DatabaseNotInitializedError(DatabaseError):
|
|
25
27
|
"""Operation attempted on uninitialized database."""
|
|
28
|
+
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ConnectionPoolError(DatabaseError):
|
|
33
|
+
"""Connection pool operation failed."""
|
|
34
|
+
|
|
26
35
|
pass
|
|
27
36
|
|
|
28
37
|
|
|
29
38
|
class DocumentAdditionError(DatabaseError):
|
|
30
39
|
"""Failed to add documents to database."""
|
|
40
|
+
|
|
31
41
|
pass
|
|
32
42
|
|
|
33
43
|
|
|
34
44
|
class SearchError(DatabaseError):
|
|
35
45
|
"""Search operation failed."""
|
|
46
|
+
|
|
36
47
|
pass
|
|
37
48
|
|
|
38
49
|
|
|
39
50
|
class ParsingError(MCPVectorSearchError):
|
|
40
51
|
"""Code parsing errors."""
|
|
52
|
+
|
|
41
53
|
pass
|
|
42
54
|
|
|
43
55
|
|
|
44
56
|
class EmbeddingError(MCPVectorSearchError):
|
|
45
57
|
"""Embedding generation errors."""
|
|
58
|
+
|
|
46
59
|
pass
|
|
47
60
|
|
|
48
61
|
|
|
49
62
|
class ConfigurationError(MCPVectorSearchError):
|
|
50
63
|
"""Configuration validation errors."""
|
|
64
|
+
|
|
51
65
|
pass
|
|
52
66
|
|
|
53
67
|
|
|
54
68
|
class ProjectError(MCPVectorSearchError):
|
|
55
69
|
"""Project management errors."""
|
|
70
|
+
|
|
56
71
|
pass
|
|
57
72
|
|
|
58
73
|
|
|
59
74
|
class ProjectNotFoundError(ProjectError):
|
|
60
75
|
"""Project directory or configuration not found."""
|
|
76
|
+
|
|
61
77
|
pass
|
|
62
78
|
|
|
63
79
|
|
|
64
80
|
class ProjectInitializationError(ProjectError):
|
|
65
81
|
"""Failed to initialize project."""
|
|
82
|
+
|
|
66
83
|
pass
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""Component factory for creating commonly used objects."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, TypeVar
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from loguru import logger
|
|
11
|
+
|
|
12
|
+
from ..cli.output import print_error
|
|
13
|
+
from ..config.settings import ProjectConfig
|
|
14
|
+
from .auto_indexer import AutoIndexer
|
|
15
|
+
from .database import ChromaVectorDatabase, PooledChromaVectorDatabase, VectorDatabase
|
|
16
|
+
from .embeddings import CodeBERTEmbeddingFunction, create_embedding_function
|
|
17
|
+
from .indexer import SemanticIndexer
|
|
18
|
+
from .project import ProjectManager
|
|
19
|
+
from .search import SemanticSearchEngine
|
|
20
|
+
|
|
21
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class ComponentBundle:
|
|
26
|
+
"""Bundle of commonly used components."""
|
|
27
|
+
|
|
28
|
+
project_manager: ProjectManager
|
|
29
|
+
config: ProjectConfig
|
|
30
|
+
database: VectorDatabase
|
|
31
|
+
indexer: SemanticIndexer
|
|
32
|
+
embedding_function: CodeBERTEmbeddingFunction
|
|
33
|
+
search_engine: SemanticSearchEngine | None = None
|
|
34
|
+
auto_indexer: AutoIndexer | None = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ComponentFactory:
|
|
38
|
+
"""Factory for creating commonly used components."""
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def create_project_manager(project_root: Path) -> ProjectManager:
|
|
42
|
+
"""Create a project manager."""
|
|
43
|
+
return ProjectManager(project_root)
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def load_config(project_root: Path) -> tuple[ProjectManager, ProjectConfig]:
|
|
47
|
+
"""Load project configuration."""
|
|
48
|
+
project_manager = ComponentFactory.create_project_manager(project_root)
|
|
49
|
+
config = project_manager.load_config()
|
|
50
|
+
return project_manager, config
|
|
51
|
+
|
|
52
|
+
@staticmethod
|
|
53
|
+
def create_embedding_function(
|
|
54
|
+
model_name: str,
|
|
55
|
+
) -> tuple[CodeBERTEmbeddingFunction, Any]:
|
|
56
|
+
"""Create embedding function."""
|
|
57
|
+
return create_embedding_function(model_name)
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def create_database(
|
|
61
|
+
config: ProjectConfig,
|
|
62
|
+
embedding_function: CodeBERTEmbeddingFunction,
|
|
63
|
+
use_pooling: bool = False,
|
|
64
|
+
**pool_kwargs,
|
|
65
|
+
) -> VectorDatabase:
|
|
66
|
+
"""Create vector database."""
|
|
67
|
+
if use_pooling:
|
|
68
|
+
return PooledChromaVectorDatabase(
|
|
69
|
+
persist_directory=config.index_path,
|
|
70
|
+
embedding_function=embedding_function,
|
|
71
|
+
collection_name="code_search",
|
|
72
|
+
**pool_kwargs,
|
|
73
|
+
)
|
|
74
|
+
else:
|
|
75
|
+
return ChromaVectorDatabase(
|
|
76
|
+
persist_directory=config.index_path,
|
|
77
|
+
embedding_function=embedding_function,
|
|
78
|
+
collection_name="code_search",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def create_indexer(
|
|
83
|
+
database: VectorDatabase, project_root: Path, config: ProjectConfig
|
|
84
|
+
) -> SemanticIndexer:
|
|
85
|
+
"""Create semantic indexer."""
|
|
86
|
+
return SemanticIndexer(
|
|
87
|
+
database=database,
|
|
88
|
+
project_root=project_root,
|
|
89
|
+
file_extensions=config.file_extensions,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
@staticmethod
|
|
93
|
+
def create_search_engine(
|
|
94
|
+
database: VectorDatabase,
|
|
95
|
+
project_root: Path,
|
|
96
|
+
similarity_threshold: float = 0.7,
|
|
97
|
+
auto_indexer: AutoIndexer | None = None,
|
|
98
|
+
enable_auto_reindex: bool = True,
|
|
99
|
+
) -> SemanticSearchEngine:
|
|
100
|
+
"""Create semantic search engine."""
|
|
101
|
+
return SemanticSearchEngine(
|
|
102
|
+
database=database,
|
|
103
|
+
project_root=project_root,
|
|
104
|
+
similarity_threshold=similarity_threshold,
|
|
105
|
+
auto_indexer=auto_indexer,
|
|
106
|
+
enable_auto_reindex=enable_auto_reindex,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def create_auto_indexer(
|
|
111
|
+
indexer: SemanticIndexer,
|
|
112
|
+
database: VectorDatabase,
|
|
113
|
+
auto_reindex_threshold: int = 5,
|
|
114
|
+
staleness_threshold: float = 300.0,
|
|
115
|
+
) -> AutoIndexer:
|
|
116
|
+
"""Create auto-indexer."""
|
|
117
|
+
return AutoIndexer(
|
|
118
|
+
indexer=indexer,
|
|
119
|
+
database=database,
|
|
120
|
+
auto_reindex_threshold=auto_reindex_threshold,
|
|
121
|
+
staleness_threshold=staleness_threshold,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
@staticmethod
|
|
125
|
+
async def create_standard_components(
|
|
126
|
+
project_root: Path,
|
|
127
|
+
use_pooling: bool = False,
|
|
128
|
+
include_search_engine: bool = False,
|
|
129
|
+
include_auto_indexer: bool = False,
|
|
130
|
+
similarity_threshold: float = 0.7,
|
|
131
|
+
auto_reindex_threshold: int = 5,
|
|
132
|
+
**pool_kwargs,
|
|
133
|
+
) -> ComponentBundle:
|
|
134
|
+
"""Create standard set of components for CLI commands.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
project_root: Project root directory
|
|
138
|
+
use_pooling: Whether to use connection pooling
|
|
139
|
+
include_search_engine: Whether to create search engine
|
|
140
|
+
include_auto_indexer: Whether to create auto-indexer
|
|
141
|
+
similarity_threshold: Default similarity threshold for search
|
|
142
|
+
auto_reindex_threshold: Max files to auto-reindex
|
|
143
|
+
**pool_kwargs: Additional arguments for connection pool
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
ComponentBundle with requested components
|
|
147
|
+
"""
|
|
148
|
+
# Load configuration
|
|
149
|
+
project_manager, config = ComponentFactory.load_config(project_root)
|
|
150
|
+
|
|
151
|
+
# Create embedding function
|
|
152
|
+
embedding_function, _ = ComponentFactory.create_embedding_function(
|
|
153
|
+
config.embedding_model
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Create database
|
|
157
|
+
database = ComponentFactory.create_database(
|
|
158
|
+
config=config,
|
|
159
|
+
embedding_function=embedding_function,
|
|
160
|
+
use_pooling=use_pooling,
|
|
161
|
+
**pool_kwargs,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Create indexer
|
|
165
|
+
indexer = ComponentFactory.create_indexer(
|
|
166
|
+
database=database,
|
|
167
|
+
project_root=project_root,
|
|
168
|
+
config=config,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Create optional components
|
|
172
|
+
search_engine = None
|
|
173
|
+
auto_indexer = None
|
|
174
|
+
|
|
175
|
+
if include_auto_indexer:
|
|
176
|
+
auto_indexer = ComponentFactory.create_auto_indexer(
|
|
177
|
+
indexer=indexer,
|
|
178
|
+
database=database,
|
|
179
|
+
auto_reindex_threshold=auto_reindex_threshold,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if include_search_engine:
|
|
183
|
+
search_engine = ComponentFactory.create_search_engine(
|
|
184
|
+
database=database,
|
|
185
|
+
project_root=project_root,
|
|
186
|
+
similarity_threshold=similarity_threshold,
|
|
187
|
+
auto_indexer=auto_indexer,
|
|
188
|
+
enable_auto_reindex=include_auto_indexer,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return ComponentBundle(
|
|
192
|
+
project_manager=project_manager,
|
|
193
|
+
config=config,
|
|
194
|
+
database=database,
|
|
195
|
+
indexer=indexer,
|
|
196
|
+
embedding_function=embedding_function,
|
|
197
|
+
search_engine=search_engine,
|
|
198
|
+
auto_indexer=auto_indexer,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class DatabaseContext:
|
|
203
|
+
"""Context manager for database lifecycle management."""
|
|
204
|
+
|
|
205
|
+
def __init__(self, database: VectorDatabase):
|
|
206
|
+
"""Initialize database context.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
database: Vector database instance
|
|
210
|
+
"""
|
|
211
|
+
self.database = database
|
|
212
|
+
|
|
213
|
+
async def __aenter__(self) -> VectorDatabase:
|
|
214
|
+
"""Enter context and initialize database."""
|
|
215
|
+
await self.database.initialize()
|
|
216
|
+
return self.database
|
|
217
|
+
|
|
218
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
219
|
+
"""Exit context and close database."""
|
|
220
|
+
await self.database.close()
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def handle_cli_errors(operation_name: str) -> Callable[[F], F]:
|
|
224
|
+
"""Decorator for consistent CLI error handling.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
operation_name: Name of the operation for error messages
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Decorator function
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
def decorator(func: F) -> F:
|
|
234
|
+
@functools.wraps(func)
|
|
235
|
+
async def async_wrapper(*args, **kwargs):
|
|
236
|
+
try:
|
|
237
|
+
return await func(*args, **kwargs)
|
|
238
|
+
except Exception as e:
|
|
239
|
+
logger.error(f"{operation_name} failed: {e}")
|
|
240
|
+
print_error(f"{operation_name} failed: {e}")
|
|
241
|
+
raise typer.Exit(1)
|
|
242
|
+
|
|
243
|
+
@functools.wraps(func)
|
|
244
|
+
def sync_wrapper(*args, **kwargs):
|
|
245
|
+
try:
|
|
246
|
+
return func(*args, **kwargs)
|
|
247
|
+
except Exception as e:
|
|
248
|
+
logger.error(f"{operation_name} failed: {e}")
|
|
249
|
+
print_error(f"{operation_name} failed: {e}")
|
|
250
|
+
raise typer.Exit(1)
|
|
251
|
+
|
|
252
|
+
# Return appropriate wrapper based on function type
|
|
253
|
+
if hasattr(func, "__code__") and "await" in func.__code__.co_names:
|
|
254
|
+
return async_wrapper
|
|
255
|
+
else:
|
|
256
|
+
return sync_wrapper
|
|
257
|
+
|
|
258
|
+
return decorator
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class ConfigurationService:
|
|
262
|
+
"""Centralized configuration management service."""
|
|
263
|
+
|
|
264
|
+
def __init__(self, project_root: Path):
|
|
265
|
+
"""Initialize configuration service.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
project_root: Project root directory
|
|
269
|
+
"""
|
|
270
|
+
self.project_root = project_root
|
|
271
|
+
self._project_manager: ProjectManager | None = None
|
|
272
|
+
self._config: ProjectConfig | None = None
|
|
273
|
+
|
|
274
|
+
@property
|
|
275
|
+
def project_manager(self) -> ProjectManager:
|
|
276
|
+
"""Get project manager (lazy loaded)."""
|
|
277
|
+
if self._project_manager is None:
|
|
278
|
+
self._project_manager = ProjectManager(self.project_root)
|
|
279
|
+
return self._project_manager
|
|
280
|
+
|
|
281
|
+
@property
|
|
282
|
+
def config(self) -> ProjectConfig:
|
|
283
|
+
"""Get project configuration (lazy loaded)."""
|
|
284
|
+
if self._config is None:
|
|
285
|
+
self._config = self.project_manager.load_config()
|
|
286
|
+
return self._config
|
|
287
|
+
|
|
288
|
+
def ensure_initialized(self) -> bool:
|
|
289
|
+
"""Ensure project is initialized.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
True if project is initialized, False otherwise
|
|
293
|
+
"""
|
|
294
|
+
if not self.project_manager.is_initialized():
|
|
295
|
+
print_error("Project not initialized. Run 'mcp-vector-search init' first.")
|
|
296
|
+
return False
|
|
297
|
+
return True
|
|
298
|
+
|
|
299
|
+
def reload_config(self) -> None:
|
|
300
|
+
"""Reload configuration from disk."""
|
|
301
|
+
self._config = None
|
|
302
|
+
|
|
303
|
+
def save_config(self, config: ProjectConfig) -> None:
|
|
304
|
+
"""Save configuration to disk.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
config: Configuration to save
|
|
308
|
+
"""
|
|
309
|
+
self.project_manager.save_config(config)
|
|
310
|
+
self._config = config
|