mcp-vector-search 0.15.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (86) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/chat.py +534 -0
  6. mcp_vector_search/cli/commands/config.py +393 -0
  7. mcp_vector_search/cli/commands/demo.py +358 -0
  8. mcp_vector_search/cli/commands/index.py +762 -0
  9. mcp_vector_search/cli/commands/init.py +658 -0
  10. mcp_vector_search/cli/commands/install.py +869 -0
  11. mcp_vector_search/cli/commands/install_old.py +700 -0
  12. mcp_vector_search/cli/commands/mcp.py +1254 -0
  13. mcp_vector_search/cli/commands/reset.py +393 -0
  14. mcp_vector_search/cli/commands/search.py +796 -0
  15. mcp_vector_search/cli/commands/setup.py +1133 -0
  16. mcp_vector_search/cli/commands/status.py +584 -0
  17. mcp_vector_search/cli/commands/uninstall.py +404 -0
  18. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  19. mcp_vector_search/cli/commands/visualize/cli.py +265 -0
  20. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  21. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  22. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  23. mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
  24. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  25. mcp_vector_search/cli/commands/visualize/server.py +201 -0
  26. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  27. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  28. mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
  29. mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
  30. mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
  31. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  32. mcp_vector_search/cli/commands/watch.py +287 -0
  33. mcp_vector_search/cli/didyoumean.py +520 -0
  34. mcp_vector_search/cli/export.py +320 -0
  35. mcp_vector_search/cli/history.py +295 -0
  36. mcp_vector_search/cli/interactive.py +342 -0
  37. mcp_vector_search/cli/main.py +484 -0
  38. mcp_vector_search/cli/output.py +414 -0
  39. mcp_vector_search/cli/suggestions.py +375 -0
  40. mcp_vector_search/config/__init__.py +1 -0
  41. mcp_vector_search/config/constants.py +24 -0
  42. mcp_vector_search/config/defaults.py +200 -0
  43. mcp_vector_search/config/settings.py +146 -0
  44. mcp_vector_search/core/__init__.py +1 -0
  45. mcp_vector_search/core/auto_indexer.py +298 -0
  46. mcp_vector_search/core/config_utils.py +394 -0
  47. mcp_vector_search/core/connection_pool.py +360 -0
  48. mcp_vector_search/core/database.py +1237 -0
  49. mcp_vector_search/core/directory_index.py +318 -0
  50. mcp_vector_search/core/embeddings.py +294 -0
  51. mcp_vector_search/core/exceptions.py +89 -0
  52. mcp_vector_search/core/factory.py +318 -0
  53. mcp_vector_search/core/git_hooks.py +345 -0
  54. mcp_vector_search/core/indexer.py +1002 -0
  55. mcp_vector_search/core/llm_client.py +453 -0
  56. mcp_vector_search/core/models.py +294 -0
  57. mcp_vector_search/core/project.py +350 -0
  58. mcp_vector_search/core/scheduler.py +330 -0
  59. mcp_vector_search/core/search.py +952 -0
  60. mcp_vector_search/core/watcher.py +322 -0
  61. mcp_vector_search/mcp/__init__.py +5 -0
  62. mcp_vector_search/mcp/__main__.py +25 -0
  63. mcp_vector_search/mcp/server.py +752 -0
  64. mcp_vector_search/parsers/__init__.py +8 -0
  65. mcp_vector_search/parsers/base.py +296 -0
  66. mcp_vector_search/parsers/dart.py +605 -0
  67. mcp_vector_search/parsers/html.py +413 -0
  68. mcp_vector_search/parsers/javascript.py +643 -0
  69. mcp_vector_search/parsers/php.py +694 -0
  70. mcp_vector_search/parsers/python.py +502 -0
  71. mcp_vector_search/parsers/registry.py +223 -0
  72. mcp_vector_search/parsers/ruby.py +678 -0
  73. mcp_vector_search/parsers/text.py +186 -0
  74. mcp_vector_search/parsers/utils.py +265 -0
  75. mcp_vector_search/py.typed +1 -0
  76. mcp_vector_search/utils/__init__.py +42 -0
  77. mcp_vector_search/utils/gitignore.py +250 -0
  78. mcp_vector_search/utils/gitignore_updater.py +212 -0
  79. mcp_vector_search/utils/monorepo.py +339 -0
  80. mcp_vector_search/utils/timing.py +338 -0
  81. mcp_vector_search/utils/version.py +47 -0
  82. mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
  83. mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
  84. mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
  85. mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
  86. mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,146 @@
1
+ """Pydantic configuration schemas for MCP Vector Search."""
2
+
3
+ from pathlib import Path
4
+
5
+ from pydantic import Field, field_validator
6
+ from pydantic_settings import BaseSettings
7
+
8
+
9
+ class ProjectConfig(BaseSettings):
10
+ """Type-safe project configuration with validation."""
11
+
12
+ project_root: Path = Field(..., description="Project root directory")
13
+ index_path: Path = Field(
14
+ default=".mcp-vector-search", description="Index storage path"
15
+ )
16
+ file_extensions: list[str] = Field(
17
+ default=[".py", ".js", ".ts", ".jsx", ".tsx"],
18
+ description="File extensions to index",
19
+ )
20
+ embedding_model: str = Field(
21
+ default="sentence-transformers/all-MiniLM-L6-v2",
22
+ description="Embedding model name",
23
+ )
24
+ similarity_threshold: float = Field(
25
+ default=0.3, ge=0.0, le=1.0, description="Similarity threshold"
26
+ )
27
+ max_chunk_size: int = Field(
28
+ default=512, gt=0, description="Maximum chunk size in tokens"
29
+ )
30
+ languages: list[str] = Field(
31
+ default=[], description="Detected programming languages"
32
+ )
33
+ watch_files: bool = Field(
34
+ default=False, description="Enable file watching for incremental updates"
35
+ )
36
+ cache_embeddings: bool = Field(default=True, description="Enable embedding caching")
37
+ max_cache_size: int = Field(
38
+ default=1000, gt=0, description="Maximum number of cached embeddings"
39
+ )
40
+ auto_reindex_on_upgrade: bool = Field(
41
+ default=True,
42
+ description="Automatically reindex when tool version is upgraded (minor/major versions)",
43
+ )
44
+ skip_dotfiles: bool = Field(
45
+ default=True,
46
+ description="Skip files and directories starting with '.' (except whitelisted ones)",
47
+ )
48
+ respect_gitignore: bool = Field(
49
+ default=True,
50
+ description="Respect .gitignore patterns when indexing files",
51
+ )
52
+ openrouter_api_key: str | None = Field(
53
+ default=None,
54
+ description="OpenRouter API key for chat command (optional, can also use env var)",
55
+ )
56
+ openai_api_key: str | None = Field(
57
+ default=None,
58
+ description="OpenAI API key for chat command (optional, can also use env var)",
59
+ )
60
+ preferred_llm_provider: str | None = Field(
61
+ default=None,
62
+ description="Preferred LLM provider: 'openai' or 'openrouter' (auto-detect if not set)",
63
+ )
64
+
65
+ @field_validator("project_root", "index_path", mode="before")
66
+ @classmethod
67
+ def validate_paths(cls, v: Path) -> Path:
68
+ """Ensure paths are absolute and normalized."""
69
+ if isinstance(v, str):
70
+ v = Path(v)
71
+ return v.resolve() if isinstance(v, Path) else v
72
+
73
+ @field_validator("file_extensions", mode="before")
74
+ @classmethod
75
+ def validate_extensions(cls, v: list[str]) -> list[str]:
76
+ """Ensure extensions start with dot."""
77
+ if isinstance(v, list):
78
+ return [ext if ext.startswith(".") else f".{ext}" for ext in v]
79
+ return v
80
+
81
+ model_config = {
82
+ "env_prefix": "MCP_VECTOR_SEARCH_",
83
+ "case_sensitive": False,
84
+ }
85
+
86
+
87
+ class DatabaseConfig(BaseSettings):
88
+ """Database configuration settings."""
89
+
90
+ persist_directory: Path | None = Field(
91
+ default=None, description="ChromaDB persistence directory"
92
+ )
93
+ collection_name: str = Field(
94
+ default="code_search", description="ChromaDB collection name"
95
+ )
96
+ batch_size: int = Field(
97
+ default=32, gt=0, description="Batch size for embedding operations"
98
+ )
99
+ enable_telemetry: bool = Field(
100
+ default=False, description="Enable ChromaDB telemetry"
101
+ )
102
+
103
+ @field_validator("persist_directory", mode="before")
104
+ @classmethod
105
+ def validate_persist_directory(cls, v: Path | None) -> Path | None:
106
+ """Ensure persist directory is absolute if provided."""
107
+ if v and isinstance(v, str):
108
+ v = Path(v)
109
+ return v.resolve() if isinstance(v, Path) else None
110
+
111
+ model_config = {
112
+ "env_prefix": "MCP_VECTOR_SEARCH_DB_",
113
+ "case_sensitive": False,
114
+ }
115
+
116
+
117
+ class SearchConfig(BaseSettings):
118
+ """Search configuration settings."""
119
+
120
+ default_limit: int = Field(
121
+ default=10, gt=0, description="Default number of search results"
122
+ )
123
+ max_limit: int = Field(
124
+ default=100, gt=0, description="Maximum number of search results"
125
+ )
126
+ enable_reranking: bool = Field(default=True, description="Enable result reranking")
127
+ context_lines: int = Field(
128
+ default=3, ge=0, description="Number of context lines to include"
129
+ )
130
+
131
+ @field_validator("max_limit", mode="after")
132
+ @classmethod
133
+ def validate_max_limit(cls, v: int, info) -> int:
134
+ """Ensure max_limit is greater than default_limit."""
135
+ if info.data and "default_limit" in info.data:
136
+ default_limit = info.data["default_limit"]
137
+ if v < default_limit:
138
+ raise ValueError(
139
+ "max_limit must be greater than or equal to default_limit"
140
+ )
141
+ return v
142
+
143
+ model_config = {
144
+ "env_prefix": "MCP_VECTOR_SEARCH_SEARCH_",
145
+ "case_sensitive": False,
146
+ }
@@ -0,0 +1 @@
1
+ """Core functionality for MCP Vector Search."""
@@ -0,0 +1,298 @@
1
+ """Automatic indexing strategies without daemon processes."""
2
+
3
+ import asyncio
4
+ import os
5
+ import time
6
+ from pathlib import Path
7
+
8
+ from loguru import logger
9
+
10
+ from .database import VectorDatabase
11
+ from .indexer import SemanticIndexer
12
+
13
+
14
+ class AutoIndexer:
15
+ """Handles automatic reindexing without daemon processes."""
16
+
17
+ def __init__(
18
+ self,
19
+ indexer: SemanticIndexer,
20
+ database: VectorDatabase,
21
+ auto_reindex_threshold: int = 5, # Max files to auto-reindex
22
+ staleness_threshold: float = 300.0, # 5 minutes
23
+ ):
24
+ """Initialize auto-indexer.
25
+
26
+ Args:
27
+ indexer: Semantic indexer instance
28
+ database: Vector database instance
29
+ auto_reindex_threshold: Max files to auto-reindex without asking
30
+ staleness_threshold: Time in seconds before considering index stale
31
+ """
32
+ self.indexer = indexer
33
+ self.database = database
34
+ self.auto_reindex_threshold = auto_reindex_threshold
35
+ self.staleness_threshold = staleness_threshold
36
+ self._last_check_time = 0.0
37
+ self._check_interval = 30.0 # Check at most every 30 seconds
38
+
39
+ async def check_and_reindex_if_needed(
40
+ self, force_check: bool = False, interactive: bool = True
41
+ ) -> tuple[bool, int]:
42
+ """Check if reindexing is needed and optionally perform it.
43
+
44
+ Args:
45
+ force_check: Skip time-based check throttling
46
+ interactive: Whether to prompt user for large reindexes
47
+
48
+ Returns:
49
+ Tuple of (reindexed, files_updated)
50
+ """
51
+ current_time = time.time()
52
+
53
+ # Throttle checks to avoid excessive filesystem scanning
54
+ if (
55
+ not force_check
56
+ and (current_time - self._last_check_time) < self._check_interval
57
+ ):
58
+ return False, 0
59
+
60
+ self._last_check_time = current_time
61
+
62
+ try:
63
+ # Get files that need reindexing
64
+ stale_files = await self._find_stale_files()
65
+
66
+ if not stale_files:
67
+ logger.debug("No files need reindexing")
68
+ return False, 0
69
+
70
+ logger.info(f"Found {len(stale_files)} files that need reindexing")
71
+
72
+ # Decide whether to auto-reindex
73
+ should_reindex = await self._should_auto_reindex(stale_files, interactive)
74
+
75
+ if should_reindex:
76
+ updated_count = await self._reindex_files(stale_files)
77
+ logger.info(f"Auto-reindexed {updated_count} files")
78
+ return True, updated_count
79
+ else:
80
+ logger.info("Skipping auto-reindex (user choice or too many files)")
81
+ return False, len(stale_files)
82
+
83
+ except Exception as e:
84
+ logger.error(f"Auto-reindex check failed: {e}")
85
+ return False, 0
86
+
87
+ async def _find_stale_files(self) -> list[Path]:
88
+ """Find files that need reindexing."""
89
+ try:
90
+ # Load existing metadata
91
+ metadata = self.indexer._load_index_metadata()
92
+
93
+ # Find all indexable files
94
+ all_files = self.indexer._find_indexable_files()
95
+
96
+ stale_files = []
97
+ for file_path in all_files:
98
+ if self.indexer._needs_reindexing(file_path, metadata):
99
+ stale_files.append(file_path)
100
+
101
+ return stale_files
102
+
103
+ except Exception as e:
104
+ logger.error(f"Failed to find stale files: {e}")
105
+ return []
106
+
107
+ async def _should_auto_reindex(
108
+ self, stale_files: list[Path], interactive: bool
109
+ ) -> bool:
110
+ """Determine if we should automatically reindex."""
111
+ file_count = len(stale_files)
112
+
113
+ # Always auto-reindex small numbers of files
114
+ if file_count <= self.auto_reindex_threshold:
115
+ logger.debug(f"Auto-reindexing {file_count} files (under threshold)")
116
+ return True
117
+
118
+ # For larger numbers, check if interactive mode is enabled
119
+ if not interactive:
120
+ logger.debug(
121
+ f"Skipping auto-reindex of {file_count} files (non-interactive)"
122
+ )
123
+ return False
124
+
125
+ # In interactive mode, we could prompt the user
126
+ # For now, we'll be conservative and skip large reindexes
127
+ logger.info(f"Skipping auto-reindex of {file_count} files (over threshold)")
128
+ logger.info("Run 'mcp-vector-search index' to update manually")
129
+ return False
130
+
131
+ async def _reindex_files(self, files: list[Path]) -> int:
132
+ """Reindex the specified files."""
133
+ updated_count = 0
134
+
135
+ try:
136
+ # Process files in small batches to avoid overwhelming the system
137
+ batch_size = min(self.auto_reindex_threshold, 10)
138
+
139
+ for i in range(0, len(files), batch_size):
140
+ batch = files[i : i + batch_size]
141
+
142
+ # Process batch
143
+ results = await self.indexer._process_file_batch(
144
+ batch, force_reindex=False
145
+ )
146
+
147
+ # Count successful updates
148
+ updated_count += sum(1 for success in results if success)
149
+
150
+ # Small delay between batches to be nice to the system
151
+ if i + batch_size < len(files):
152
+ await asyncio.sleep(0.1)
153
+
154
+ return updated_count
155
+
156
+ except Exception as e:
157
+ logger.error(f"Failed to reindex files: {e}")
158
+ return updated_count
159
+
160
+ def get_staleness_info(self) -> dict[str, any]:
161
+ """Get information about index staleness."""
162
+ try:
163
+ metadata = self.indexer._load_index_metadata()
164
+ all_files = self.indexer._find_indexable_files()
165
+
166
+ stale_count = 0
167
+ newest_file_time = 0.0
168
+ oldest_index_time = float("inf")
169
+
170
+ for file_path in all_files:
171
+ file_mtime = os.path.getmtime(file_path)
172
+ newest_file_time = max(newest_file_time, file_mtime)
173
+
174
+ stored_mtime = metadata.get(str(file_path), 0)
175
+ if stored_mtime > 0:
176
+ oldest_index_time = min(oldest_index_time, stored_mtime)
177
+
178
+ if self.indexer._needs_reindexing(file_path, metadata):
179
+ stale_count += 1
180
+
181
+ current_time = time.time()
182
+ staleness_seconds = (
183
+ current_time - oldest_index_time
184
+ if oldest_index_time != float("inf")
185
+ else 0
186
+ )
187
+
188
+ return {
189
+ "total_files": len(all_files),
190
+ "indexed_files": len(metadata),
191
+ "stale_files": stale_count,
192
+ "staleness_seconds": staleness_seconds,
193
+ "is_stale": staleness_seconds > self.staleness_threshold,
194
+ "newest_file_time": newest_file_time,
195
+ "oldest_index_time": (
196
+ oldest_index_time if oldest_index_time != float("inf") else 0
197
+ ),
198
+ }
199
+
200
+ except Exception as e:
201
+ logger.error(f"Failed to get staleness info: {e}")
202
+ return {
203
+ "total_files": 0,
204
+ "indexed_files": 0,
205
+ "stale_files": 0,
206
+ "staleness_seconds": 0,
207
+ "is_stale": False,
208
+ "newest_file_time": 0,
209
+ "oldest_index_time": 0,
210
+ }
211
+
212
+
213
+ class SearchTriggeredIndexer:
214
+ """Automatically reindex when searches are performed."""
215
+
216
+ def __init__(self, auto_indexer: AutoIndexer):
217
+ self.auto_indexer = auto_indexer
218
+ self._search_count = 0
219
+ self._searches_since_check = 0
220
+ self._check_every_n_searches = 10 # Check every 10 searches
221
+
222
+ async def pre_search_hook(self) -> bool:
223
+ """Hook to run before search operations.
224
+
225
+ Returns:
226
+ True if reindexing occurred, False otherwise
227
+ """
228
+ self._search_count += 1
229
+ self._searches_since_check += 1
230
+
231
+ # Only check periodically to avoid slowing down searches
232
+ if self._searches_since_check >= self._check_every_n_searches:
233
+ self._searches_since_check = 0
234
+
235
+ logger.debug("Checking for stale files before search")
236
+ reindexed, file_count = await self.auto_indexer.check_and_reindex_if_needed(
237
+ force_check=False,
238
+ interactive=False, # Non-interactive during search
239
+ )
240
+
241
+ if reindexed:
242
+ logger.info(f"Auto-reindexed {file_count} files before search")
243
+
244
+ return reindexed
245
+
246
+ return False
247
+
248
+ def get_search_stats(self) -> dict[str, int]:
249
+ """Get search-related statistics."""
250
+ return {
251
+ "total_searches": self._search_count,
252
+ "searches_since_check": self._searches_since_check,
253
+ "check_interval": self._check_every_n_searches,
254
+ }
255
+
256
+
257
+ class PeriodicIndexChecker:
258
+ """Check for stale index periodically during operations."""
259
+
260
+ def __init__(self, auto_indexer: AutoIndexer, check_interval: float = 3600.0):
261
+ """Initialize periodic checker.
262
+
263
+ Args:
264
+ auto_indexer: AutoIndexer instance
265
+ check_interval: Check interval in seconds (default: 1 hour)
266
+ """
267
+ self.auto_indexer = auto_indexer
268
+ self.check_interval = check_interval
269
+ self._last_periodic_check = 0.0
270
+
271
+ async def maybe_check_and_reindex(self) -> bool:
272
+ """Check if it's time for a periodic reindex check.
273
+
274
+ Returns:
275
+ True if reindexing occurred, False otherwise
276
+ """
277
+ current_time = time.time()
278
+
279
+ if (current_time - self._last_periodic_check) >= self.check_interval:
280
+ self._last_periodic_check = current_time
281
+
282
+ logger.debug("Performing periodic index staleness check")
283
+ reindexed, file_count = await self.auto_indexer.check_and_reindex_if_needed(
284
+ force_check=True, interactive=False
285
+ )
286
+
287
+ if reindexed:
288
+ logger.info(f"Periodic auto-reindex updated {file_count} files")
289
+
290
+ return reindexed
291
+
292
+ return False
293
+
294
+ def time_until_next_check(self) -> float:
295
+ """Get time in seconds until next periodic check."""
296
+ current_time = time.time()
297
+ elapsed = current_time - self._last_periodic_check
298
+ return max(0, self.check_interval - elapsed)