mcp-vector-search 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +2 -2
- mcp_vector_search/cli/commands/init.py +28 -8
- mcp_vector_search/cli/commands/install.py +9 -9
- mcp_vector_search/cli/commands/reset.py +1 -1
- mcp_vector_search/cli/commands/search.py +42 -0
- mcp_vector_search/cli/commands/status.py +33 -0
- mcp_vector_search/cli/didyoumean.py +1 -2
- mcp_vector_search/cli/history.py +6 -3
- mcp_vector_search/cli/main.py +12 -7
- mcp_vector_search/cli/suggestions.py +2 -1
- mcp_vector_search/config/constants.py +24 -0
- mcp_vector_search/config/defaults.py +4 -0
- mcp_vector_search/config/settings.py +4 -0
- mcp_vector_search/core/database.py +1 -1
- mcp_vector_search/core/factory.py +11 -3
- mcp_vector_search/core/indexer.py +80 -5
- mcp_vector_search/core/search.py +195 -88
- mcp_vector_search/parsers/__init__.py +2 -1
- mcp_vector_search/parsers/base.py +5 -7
- mcp_vector_search/parsers/dart.py +2 -2
- mcp_vector_search/parsers/html.py +413 -0
- mcp_vector_search/parsers/registry.py +5 -0
- mcp_vector_search/parsers/text.py +4 -3
- mcp_vector_search/parsers/utils.py +265 -0
- mcp_vector_search/utils/gitignore.py +1 -1
- {mcp_vector_search-0.5.0.dist-info → mcp_vector_search-0.6.0.dist-info}/METADATA +26 -10
- {mcp_vector_search-0.5.0.dist-info → mcp_vector_search-0.6.0.dist-info}/RECORD +30 -27
- {mcp_vector_search-0.5.0.dist-info → mcp_vector_search-0.6.0.dist-info}/WHEEL +0 -0
- {mcp_vector_search-0.5.0.dist-info → mcp_vector_search-0.6.0.dist-info}/entry_points.txt +0 -0
- {mcp_vector_search-0.5.0.dist-info → mcp_vector_search-0.6.0.dist-info}/licenses/LICENSE +0 -0
mcp_vector_search/__init__.py
CHANGED
|
@@ -5,6 +5,11 @@ from pathlib import Path
|
|
|
5
5
|
import typer
|
|
6
6
|
from loguru import logger
|
|
7
7
|
|
|
8
|
+
from ...config.constants import (
|
|
9
|
+
SUBPROCESS_INSTALL_TIMEOUT,
|
|
10
|
+
SUBPROCESS_MCP_TIMEOUT,
|
|
11
|
+
SUBPROCESS_TEST_TIMEOUT,
|
|
12
|
+
)
|
|
8
13
|
from ...config.defaults import DEFAULT_EMBEDDING_MODELS, DEFAULT_FILE_EXTENSIONS
|
|
9
14
|
from ...core.exceptions import ProjectInitializationError
|
|
10
15
|
from ...core.project import ProjectManager
|
|
@@ -150,11 +155,15 @@ def main(
|
|
|
150
155
|
install_cmd = ["pip", "install", "-e", str(dev_source_path)]
|
|
151
156
|
try:
|
|
152
157
|
result = subprocess.run(
|
|
153
|
-
install_cmd,
|
|
158
|
+
install_cmd,
|
|
159
|
+
capture_output=True,
|
|
160
|
+
text=True,
|
|
161
|
+
timeout=SUBPROCESS_INSTALL_TIMEOUT,
|
|
154
162
|
)
|
|
155
163
|
if result.returncode == 0:
|
|
156
164
|
install_success = True
|
|
157
|
-
except:
|
|
165
|
+
except (subprocess.TimeoutExpired, OSError, ValueError) as e:
|
|
166
|
+
logger.debug(f"pip install method failed: {e}")
|
|
158
167
|
pass
|
|
159
168
|
|
|
160
169
|
# Method 2: Try python -m pip
|
|
@@ -169,11 +178,15 @@ def main(
|
|
|
169
178
|
]
|
|
170
179
|
try:
|
|
171
180
|
result = subprocess.run(
|
|
172
|
-
install_cmd,
|
|
181
|
+
install_cmd,
|
|
182
|
+
capture_output=True,
|
|
183
|
+
text=True,
|
|
184
|
+
timeout=SUBPROCESS_INSTALL_TIMEOUT,
|
|
173
185
|
)
|
|
174
186
|
if result.returncode == 0:
|
|
175
187
|
install_success = True
|
|
176
|
-
except:
|
|
188
|
+
except (subprocess.TimeoutExpired, OSError, ValueError) as e:
|
|
189
|
+
logger.debug(f"python -m pip install method failed: {e}")
|
|
177
190
|
pass
|
|
178
191
|
|
|
179
192
|
# Method 3: Try uv if available
|
|
@@ -181,11 +194,15 @@ def main(
|
|
|
181
194
|
install_cmd = ["uv", "add", "--editable", str(dev_source_path)]
|
|
182
195
|
try:
|
|
183
196
|
result = subprocess.run(
|
|
184
|
-
install_cmd,
|
|
197
|
+
install_cmd,
|
|
198
|
+
capture_output=True,
|
|
199
|
+
text=True,
|
|
200
|
+
timeout=SUBPROCESS_INSTALL_TIMEOUT,
|
|
185
201
|
)
|
|
186
202
|
if result.returncode == 0:
|
|
187
203
|
install_success = True
|
|
188
|
-
except:
|
|
204
|
+
except (subprocess.TimeoutExpired, OSError, ValueError) as e:
|
|
205
|
+
logger.debug(f"uv add method failed: {e}")
|
|
189
206
|
pass
|
|
190
207
|
|
|
191
208
|
if install_success:
|
|
@@ -502,7 +519,10 @@ async def run_init_setup(
|
|
|
502
519
|
] + server_command.split()
|
|
503
520
|
|
|
504
521
|
result = subprocess.run(
|
|
505
|
-
cmd_args,
|
|
522
|
+
cmd_args,
|
|
523
|
+
capture_output=True,
|
|
524
|
+
text=True,
|
|
525
|
+
timeout=SUBPROCESS_MCP_TIMEOUT,
|
|
506
526
|
)
|
|
507
527
|
|
|
508
528
|
if result.returncode == 0:
|
|
@@ -663,7 +683,7 @@ def _test_mcp_server(project_root: Path) -> None:
|
|
|
663
683
|
|
|
664
684
|
try:
|
|
665
685
|
stdout, stderr = test_process.communicate(
|
|
666
|
-
input=json.dumps(init_request) + "\n", timeout=
|
|
686
|
+
input=json.dumps(init_request) + "\n", timeout=SUBPROCESS_TEST_TIMEOUT
|
|
667
687
|
)
|
|
668
688
|
|
|
669
689
|
if test_process.returncode == 0:
|
|
@@ -40,22 +40,22 @@ def detect_ai_tools() -> dict[str, Path]:
|
|
|
40
40
|
Dictionary mapping tool names to their config file paths.
|
|
41
41
|
For Claude Code, returns a placeholder path since it uses project-scoped .mcp.json
|
|
42
42
|
"""
|
|
43
|
-
|
|
43
|
+
home = Path.home()
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
"claude-desktop":
|
|
45
|
+
config_locations = {
|
|
46
|
+
"claude-desktop": home
|
|
47
47
|
/ "Library"
|
|
48
48
|
/ "Application Support"
|
|
49
49
|
/ "Claude"
|
|
50
50
|
/ "claude_desktop_config.json",
|
|
51
|
-
"cursor":
|
|
52
|
-
"windsurf":
|
|
53
|
-
"vscode":
|
|
51
|
+
"cursor": home / ".cursor" / "mcp.json",
|
|
52
|
+
"windsurf": home / ".codeium" / "windsurf" / "mcp_config.json",
|
|
53
|
+
"vscode": home / ".vscode" / "mcp.json",
|
|
54
54
|
}
|
|
55
55
|
|
|
56
56
|
# Return only tools with existing config files
|
|
57
57
|
detected_tools = {}
|
|
58
|
-
for tool_name, config_path in
|
|
58
|
+
for tool_name, config_path in config_locations.items():
|
|
59
59
|
if config_path.exists():
|
|
60
60
|
detected_tools[tool_name] = config_path
|
|
61
61
|
|
|
@@ -587,12 +587,12 @@ def main():
|
|
|
587
587
|
|
|
588
588
|
class UserService:
|
|
589
589
|
'''Service for managing users.'''
|
|
590
|
-
|
|
590
|
+
|
|
591
591
|
def create_user(self, name: str, email: str):
|
|
592
592
|
'''Create a new user with the given name and email.'''
|
|
593
593
|
print(f"Creating user: {name} ({email})")
|
|
594
594
|
return {"name": name, "email": email}
|
|
595
|
-
|
|
595
|
+
|
|
596
596
|
def authenticate_user(self, email: str, password: str):
|
|
597
597
|
'''Authenticate user with email and password.'''
|
|
598
598
|
# Simple authentication logic
|
|
@@ -9,6 +9,7 @@ from loguru import logger
|
|
|
9
9
|
from ...core.database import ChromaVectorDatabase
|
|
10
10
|
from ...core.embeddings import create_embedding_function
|
|
11
11
|
from ...core.exceptions import ProjectNotFoundError
|
|
12
|
+
from ...core.indexer import SemanticIndexer
|
|
12
13
|
from ...core.project import ProjectManager
|
|
13
14
|
from ...core.search import SemanticSearchEngine
|
|
14
15
|
from ..didyoumean import create_enhanced_typer
|
|
@@ -258,6 +259,47 @@ async def run_search(
|
|
|
258
259
|
embedding_function=embedding_function,
|
|
259
260
|
)
|
|
260
261
|
|
|
262
|
+
# Create indexer for version check
|
|
263
|
+
indexer = SemanticIndexer(
|
|
264
|
+
database=database,
|
|
265
|
+
project_root=project_root,
|
|
266
|
+
file_extensions=config.file_extensions,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# Check if reindex is needed due to version upgrade
|
|
270
|
+
if config.auto_reindex_on_upgrade and indexer.needs_reindex_for_version():
|
|
271
|
+
from ..output import console
|
|
272
|
+
|
|
273
|
+
index_version = indexer.get_index_version()
|
|
274
|
+
from ... import __version__
|
|
275
|
+
|
|
276
|
+
if index_version:
|
|
277
|
+
console.print(
|
|
278
|
+
f"[yellow]⚠️ Index created with version {index_version} (current: {__version__})[/yellow]"
|
|
279
|
+
)
|
|
280
|
+
else:
|
|
281
|
+
console.print(
|
|
282
|
+
"[yellow]⚠️ Index version not found (legacy format detected)[/yellow]"
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
console.print(
|
|
286
|
+
"[yellow] Reindexing to take advantage of improvements...[/yellow]"
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Auto-reindex with progress
|
|
290
|
+
try:
|
|
291
|
+
indexed_count = await indexer.index_project(
|
|
292
|
+
force_reindex=True, show_progress=False
|
|
293
|
+
)
|
|
294
|
+
console.print(
|
|
295
|
+
f"[green]✓ Index updated to version {__version__} ({indexed_count} files reindexed)[/green]\n"
|
|
296
|
+
)
|
|
297
|
+
except Exception as e:
|
|
298
|
+
console.print(f"[red]✗ Reindexing failed: {e}[/red]")
|
|
299
|
+
console.print(
|
|
300
|
+
"[yellow] Continuing with existing index (may have outdated patterns)[/yellow]\n"
|
|
301
|
+
)
|
|
302
|
+
|
|
261
303
|
search_engine = SemanticSearchEngine(
|
|
262
304
|
database=database,
|
|
263
305
|
project_root=project_root,
|
|
@@ -9,6 +9,7 @@ from typing import Any
|
|
|
9
9
|
import typer
|
|
10
10
|
from loguru import logger
|
|
11
11
|
|
|
12
|
+
from ... import __version__
|
|
12
13
|
from ...core.database import ChromaVectorDatabase
|
|
13
14
|
from ...core.embeddings import create_embedding_function
|
|
14
15
|
from ...core.exceptions import ProjectNotFoundError
|
|
@@ -165,6 +166,10 @@ async def show_status(
|
|
|
165
166
|
index_stats = await indexer.get_indexing_stats()
|
|
166
167
|
db_stats = await database.get_stats()
|
|
167
168
|
|
|
169
|
+
# Get version information
|
|
170
|
+
index_version = indexer.get_index_version()
|
|
171
|
+
needs_reindex = indexer.needs_reindex_for_version()
|
|
172
|
+
|
|
168
173
|
# Compile status data
|
|
169
174
|
status_data = {
|
|
170
175
|
"project": {
|
|
@@ -181,6 +186,7 @@ async def show_status(
|
|
|
181
186
|
"max_chunk_size": config.max_chunk_size,
|
|
182
187
|
"cache_embeddings": config.cache_embeddings,
|
|
183
188
|
"watch_files": config.watch_files,
|
|
189
|
+
"auto_reindex_on_upgrade": config.auto_reindex_on_upgrade,
|
|
184
190
|
},
|
|
185
191
|
"index": {
|
|
186
192
|
"total_files": index_stats.get("total_indexable_files", 0),
|
|
@@ -189,6 +195,9 @@ async def show_status(
|
|
|
189
195
|
"languages": index_stats.get("languages", {}),
|
|
190
196
|
"index_size_mb": db_stats.index_size_mb,
|
|
191
197
|
"last_updated": db_stats.last_updated,
|
|
198
|
+
"index_version": index_version,
|
|
199
|
+
"current_version": __version__,
|
|
200
|
+
"needs_reindex": needs_reindex,
|
|
192
201
|
},
|
|
193
202
|
}
|
|
194
203
|
|
|
@@ -267,12 +276,36 @@ def _display_status(
|
|
|
267
276
|
console.print(f" Total Chunks: {index_data['total_chunks']}")
|
|
268
277
|
console.print(f" Index Size: {index_data['index_size_mb']:.2f} MB")
|
|
269
278
|
|
|
279
|
+
# Version information
|
|
280
|
+
index_version = index_data.get("index_version")
|
|
281
|
+
current_version = index_data.get("current_version", __version__)
|
|
282
|
+
needs_reindex = index_data.get("needs_reindex", False)
|
|
283
|
+
|
|
284
|
+
if index_version:
|
|
285
|
+
if needs_reindex:
|
|
286
|
+
console.print(
|
|
287
|
+
f" Version: [yellow]{index_version}[/yellow] (current: {current_version}) [yellow]⚠️ Reindex recommended[/yellow]"
|
|
288
|
+
)
|
|
289
|
+
else:
|
|
290
|
+
console.print(f" Version: [green]{index_version}[/green] (up to date)")
|
|
291
|
+
else:
|
|
292
|
+
console.print(
|
|
293
|
+
f" Version: [yellow]Not tracked[/yellow] (current: {current_version}) [yellow]⚠️ Reindex recommended[/yellow]"
|
|
294
|
+
)
|
|
295
|
+
|
|
270
296
|
if index_data["languages"]:
|
|
271
297
|
console.print(" Language Distribution:")
|
|
272
298
|
for lang, count in index_data["languages"].items():
|
|
273
299
|
console.print(f" {lang}: {count} chunks")
|
|
274
300
|
console.print()
|
|
275
301
|
|
|
302
|
+
# Show reindex recommendation if needed
|
|
303
|
+
if needs_reindex:
|
|
304
|
+
console.print(
|
|
305
|
+
"[yellow]💡 Tip: Run 'mcp-vector-search index' to reindex with the latest improvements[/yellow]"
|
|
306
|
+
)
|
|
307
|
+
console.print()
|
|
308
|
+
|
|
276
309
|
# Health check results
|
|
277
310
|
if "health" in status_data:
|
|
278
311
|
health_data = status_data["health"]
|
|
@@ -266,7 +266,6 @@ COMMON_TYPOS = {
|
|
|
266
266
|
"context": "mcp",
|
|
267
267
|
"m": "mcp", # Single letter shortcut
|
|
268
268
|
# Install command variations
|
|
269
|
-
"setup": "install",
|
|
270
269
|
"deploy": "install",
|
|
271
270
|
"add": "install",
|
|
272
271
|
"instal": "install", # Common typo
|
|
@@ -471,7 +470,7 @@ def add_common_suggestions(ctx: click.Context, command_name: str) -> None:
|
|
|
471
470
|
console.print("\n[yellow]Did you mean one of these?[/yellow]")
|
|
472
471
|
|
|
473
472
|
# Show up to 3 best matches
|
|
474
|
-
for cmd,
|
|
473
|
+
for cmd, _ratio in fuzzy_matches[:3]:
|
|
475
474
|
console.print(format_command_suggestion(cmd, show_examples=False))
|
|
476
475
|
|
|
477
476
|
# Show example for the best match
|
mcp_vector_search/cli/history.py
CHANGED
|
@@ -184,8 +184,9 @@ class SearchHistory:
|
|
|
184
184
|
|
|
185
185
|
self._save_favorites(favorites)
|
|
186
186
|
|
|
187
|
-
except Exception:
|
|
187
|
+
except Exception as e:
|
|
188
188
|
# Don't show error for this non-critical operation
|
|
189
|
+
logger.debug(f"Failed to update history ranking: {e}")
|
|
189
190
|
pass
|
|
190
191
|
|
|
191
192
|
def _load_history(self) -> list[dict[str, Any]]:
|
|
@@ -196,7 +197,8 @@ class SearchHistory:
|
|
|
196
197
|
try:
|
|
197
198
|
with open(self.history_file, encoding="utf-8") as f:
|
|
198
199
|
return json.load(f)
|
|
199
|
-
except Exception:
|
|
200
|
+
except Exception as e:
|
|
201
|
+
logger.debug(f"Failed to load history file: {e}")
|
|
200
202
|
return []
|
|
201
203
|
|
|
202
204
|
def _save_history(self, history: list[dict[str, Any]]) -> None:
|
|
@@ -212,7 +214,8 @@ class SearchHistory:
|
|
|
212
214
|
try:
|
|
213
215
|
with open(self.favorites_file, encoding="utf-8") as f:
|
|
214
216
|
return json.load(f)
|
|
215
|
-
except Exception:
|
|
217
|
+
except Exception as e:
|
|
218
|
+
logger.debug(f"Failed to load favorites file: {e}")
|
|
216
219
|
return []
|
|
217
220
|
|
|
218
221
|
def _save_favorites(self, favorites: list[dict[str, Any]]) -> None:
|
mcp_vector_search/cli/main.py
CHANGED
|
@@ -74,10 +74,10 @@ unfamiliar codebases, finding similar patterns, and integrating with AI tools.
|
|
|
74
74
|
)
|
|
75
75
|
|
|
76
76
|
# Import command functions for direct registration and aliases
|
|
77
|
-
from .commands.index import main as index_main
|
|
78
|
-
from .commands.install import demo as install_demo
|
|
79
|
-
from .commands.install import main as install_main
|
|
80
|
-
from .commands.status import main as status_main
|
|
77
|
+
from .commands.index import main as index_main # noqa: E402
|
|
78
|
+
from .commands.install import demo as install_demo # noqa: E402
|
|
79
|
+
from .commands.install import main as install_main # noqa: E402
|
|
80
|
+
from .commands.status import main as status_main # noqa: E402
|
|
81
81
|
|
|
82
82
|
# Note: config doesn't have a main function, it uses subcommands via config_app
|
|
83
83
|
app.command("install", help="🚀 Install mcp-vector-search in projects")(install_main)
|
|
@@ -377,8 +377,9 @@ def handle_command_error(ctx, param, value):
|
|
|
377
377
|
try:
|
|
378
378
|
project_root = ctx.obj.get("project_root") if ctx.obj else None
|
|
379
379
|
get_contextual_suggestions(project_root, command_name)
|
|
380
|
-
except Exception:
|
|
380
|
+
except Exception as e:
|
|
381
381
|
# If contextual suggestions fail, don't break the error flow
|
|
382
|
+
logger.debug(f"Failed to get contextual suggestions: {e}")
|
|
382
383
|
pass
|
|
383
384
|
raise
|
|
384
385
|
|
|
@@ -393,7 +394,8 @@ def help_contextual() -> None:
|
|
|
393
394
|
)
|
|
394
395
|
console.print("[dim]CLI-first semantic code search with MCP integration[/dim]")
|
|
395
396
|
get_contextual_suggestions(project_root)
|
|
396
|
-
except Exception:
|
|
397
|
+
except Exception as e:
|
|
398
|
+
logger.debug(f"Failed to show contextual help: {e}")
|
|
397
399
|
console.print(
|
|
398
400
|
"\n[dim]Use [bold]mcp-vector-search --help[/bold] for more information.[/dim]"
|
|
399
401
|
)
|
|
@@ -457,7 +459,10 @@ def cli_with_suggestions():
|
|
|
457
459
|
try:
|
|
458
460
|
project_root = Path.cwd()
|
|
459
461
|
get_contextual_suggestions(project_root, command_name)
|
|
460
|
-
except Exception:
|
|
462
|
+
except Exception as e:
|
|
463
|
+
logger.debug(
|
|
464
|
+
f"Failed to get contextual suggestions for error handling: {e}"
|
|
465
|
+
)
|
|
461
466
|
pass
|
|
462
467
|
|
|
463
468
|
sys.exit(2) # Exit with error code
|
|
@@ -73,8 +73,9 @@ class ContextualSuggestionProvider:
|
|
|
73
73
|
# For now, we'll assume false
|
|
74
74
|
state["has_recent_changes"] = False
|
|
75
75
|
|
|
76
|
-
except Exception:
|
|
76
|
+
except Exception as e:
|
|
77
77
|
# If we can't determine state, provide conservative defaults
|
|
78
|
+
logger.debug(f"Failed to determine project state for suggestions: {e}")
|
|
78
79
|
pass
|
|
79
80
|
|
|
80
81
|
return state
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Project-wide constants for MCP Vector Search.
|
|
2
|
+
|
|
3
|
+
This module contains all magic numbers and configuration constants
|
|
4
|
+
used throughout the application to improve maintainability and clarity.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Timeout Constants (in seconds)
|
|
8
|
+
SUBPROCESS_INSTALL_TIMEOUT = 120 # Timeout for package installation commands
|
|
9
|
+
SUBPROCESS_SHORT_TIMEOUT = 10 # Short timeout for quick commands (version checks, etc.)
|
|
10
|
+
SUBPROCESS_MCP_TIMEOUT = 30 # Timeout for MCP server operations
|
|
11
|
+
SUBPROCESS_TEST_TIMEOUT = 5 # Timeout for server test operations
|
|
12
|
+
CONNECTION_POOL_TIMEOUT = 30.0 # Connection pool acquisition timeout
|
|
13
|
+
|
|
14
|
+
# Chunking Constants
|
|
15
|
+
DEFAULT_CHUNK_SIZE = 50 # Default number of lines per code chunk
|
|
16
|
+
TEXT_CHUNK_SIZE = 30 # Number of lines per text/markdown chunk
|
|
17
|
+
SEARCH_RESULT_LIMIT = 20 # Default number of search results to return
|
|
18
|
+
|
|
19
|
+
# Threshold Constants
|
|
20
|
+
DEFAULT_SIMILARITY_THRESHOLD = 0.5 # Default similarity threshold for search (0.0-1.0)
|
|
21
|
+
HIGH_SIMILARITY_THRESHOLD = 0.75 # Higher threshold for more precise matches
|
|
22
|
+
|
|
23
|
+
# Cache Constants
|
|
24
|
+
DEFAULT_CACHE_SIZE = 256 # Default LRU cache size for file reads
|
|
@@ -108,6 +108,8 @@ DEFAULT_IGNORE_PATTERNS = [
|
|
|
108
108
|
".hg",
|
|
109
109
|
"__pycache__",
|
|
110
110
|
".pytest_cache",
|
|
111
|
+
".mypy_cache", # mypy type checking cache
|
|
112
|
+
".ruff_cache", # ruff linter cache
|
|
111
113
|
"node_modules",
|
|
112
114
|
".venv",
|
|
113
115
|
"venv",
|
|
@@ -120,6 +122,8 @@ DEFAULT_IGNORE_PATTERNS = [
|
|
|
120
122
|
"*.egg-info",
|
|
121
123
|
".DS_Store",
|
|
122
124
|
"Thumbs.db",
|
|
125
|
+
".claude-mpm", # Claude MPM directory
|
|
126
|
+
".mcp-vector-search", # Our own index directory
|
|
123
127
|
]
|
|
124
128
|
|
|
125
129
|
# File patterns to ignore
|
|
@@ -37,6 +37,10 @@ class ProjectConfig(BaseSettings):
|
|
|
37
37
|
max_cache_size: int = Field(
|
|
38
38
|
default=1000, gt=0, description="Maximum number of cached embeddings"
|
|
39
39
|
)
|
|
40
|
+
auto_reindex_on_upgrade: bool = Field(
|
|
41
|
+
default=True,
|
|
42
|
+
description="Automatically reindex when tool version is upgraded (minor/major versions)",
|
|
43
|
+
)
|
|
40
44
|
|
|
41
45
|
@field_validator("project_root", "index_path", mode="before")
|
|
42
46
|
@classmethod
|
|
@@ -483,7 +483,7 @@ class ChromaVectorDatabase(VectorDatabase):
|
|
|
483
483
|
return
|
|
484
484
|
|
|
485
485
|
# Check for HNSW index files that might be corrupted
|
|
486
|
-
|
|
486
|
+
self.persist_directory / "chroma-collections.parquet"
|
|
487
487
|
index_path = self.persist_directory / "index"
|
|
488
488
|
|
|
489
489
|
if index_path.exists():
|
|
@@ -60,16 +60,24 @@ class ComponentFactory:
|
|
|
60
60
|
def create_database(
|
|
61
61
|
config: ProjectConfig,
|
|
62
62
|
embedding_function: CodeBERTEmbeddingFunction,
|
|
63
|
-
use_pooling: bool =
|
|
63
|
+
use_pooling: bool = True, # Enable pooling by default for 13.6% performance boost
|
|
64
64
|
**pool_kwargs,
|
|
65
65
|
) -> VectorDatabase:
|
|
66
66
|
"""Create vector database."""
|
|
67
67
|
if use_pooling:
|
|
68
|
+
# Set default pool parameters if not provided
|
|
69
|
+
pool_defaults = {
|
|
70
|
+
"max_connections": 10,
|
|
71
|
+
"min_connections": 2,
|
|
72
|
+
"max_idle_time": 300.0,
|
|
73
|
+
}
|
|
74
|
+
pool_defaults.update(pool_kwargs)
|
|
75
|
+
|
|
68
76
|
return PooledChromaVectorDatabase(
|
|
69
77
|
persist_directory=config.index_path,
|
|
70
78
|
embedding_function=embedding_function,
|
|
71
79
|
collection_name="code_search",
|
|
72
|
-
**
|
|
80
|
+
**pool_defaults,
|
|
73
81
|
)
|
|
74
82
|
else:
|
|
75
83
|
return ChromaVectorDatabase(
|
|
@@ -124,7 +132,7 @@ class ComponentFactory:
|
|
|
124
132
|
@staticmethod
|
|
125
133
|
async def create_standard_components(
|
|
126
134
|
project_root: Path,
|
|
127
|
-
use_pooling: bool =
|
|
135
|
+
use_pooling: bool = True, # Enable pooling by default for performance
|
|
128
136
|
include_search_engine: bool = False,
|
|
129
137
|
include_auto_indexer: bool = False,
|
|
130
138
|
similarity_threshold: float = 0.7,
|
|
@@ -3,10 +3,13 @@
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
5
|
import os
|
|
6
|
+
from datetime import UTC, datetime
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
|
|
8
9
|
from loguru import logger
|
|
10
|
+
from packaging import version
|
|
9
11
|
|
|
12
|
+
from .. import __version__
|
|
10
13
|
from ..config.defaults import DEFAULT_IGNORE_PATTERNS
|
|
11
14
|
from ..parsers.registry import get_parser_registry
|
|
12
15
|
from ..utils.gitignore import create_gitignore_parser
|
|
@@ -40,9 +43,15 @@ class SemanticIndexer:
|
|
|
40
43
|
self.file_extensions = {ext.lower() for ext in file_extensions}
|
|
41
44
|
self.parser_registry = get_parser_registry()
|
|
42
45
|
self._ignore_patterns = set(DEFAULT_IGNORE_PATTERNS)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
|
|
47
|
+
# Safely get event loop for max_workers
|
|
48
|
+
try:
|
|
49
|
+
loop = asyncio.get_event_loop()
|
|
50
|
+
self.max_workers = max_workers or min(4, (loop.get_debug() and 1) or 4)
|
|
51
|
+
except RuntimeError:
|
|
52
|
+
# No event loop in current thread
|
|
53
|
+
self.max_workers = max_workers or 4
|
|
54
|
+
|
|
46
55
|
self.batch_size = batch_size
|
|
47
56
|
self._index_metadata_file = (
|
|
48
57
|
project_root / ".mcp-vector-search" / "index_metadata.json"
|
|
@@ -182,7 +191,13 @@ class SemanticIndexer:
|
|
|
182
191
|
|
|
183
192
|
try:
|
|
184
193
|
with open(self._index_metadata_file) as f:
|
|
185
|
-
|
|
194
|
+
data = json.load(f)
|
|
195
|
+
# Handle legacy format (just file_mtimes dict) and new format
|
|
196
|
+
if "file_mtimes" in data:
|
|
197
|
+
return data["file_mtimes"]
|
|
198
|
+
else:
|
|
199
|
+
# Legacy format - just return as-is
|
|
200
|
+
return data
|
|
186
201
|
except Exception as e:
|
|
187
202
|
logger.warning(f"Failed to load index metadata: {e}")
|
|
188
203
|
return {}
|
|
@@ -197,8 +212,15 @@ class SemanticIndexer:
|
|
|
197
212
|
# Ensure directory exists
|
|
198
213
|
self._index_metadata_file.parent.mkdir(parents=True, exist_ok=True)
|
|
199
214
|
|
|
215
|
+
# New metadata format with version tracking
|
|
216
|
+
data = {
|
|
217
|
+
"index_version": __version__,
|
|
218
|
+
"indexed_at": datetime.now(UTC).isoformat(),
|
|
219
|
+
"file_mtimes": metadata,
|
|
220
|
+
}
|
|
221
|
+
|
|
200
222
|
with open(self._index_metadata_file, "w") as f:
|
|
201
|
-
json.dump(
|
|
223
|
+
json.dump(data, f, indent=2)
|
|
202
224
|
except Exception as e:
|
|
203
225
|
logger.warning(f"Failed to save index metadata: {e}")
|
|
204
226
|
|
|
@@ -447,6 +469,59 @@ class SemanticIndexer:
|
|
|
447
469
|
"""
|
|
448
470
|
return self._ignore_patterns.copy()
|
|
449
471
|
|
|
472
|
+
def get_index_version(self) -> str | None:
|
|
473
|
+
"""Get the version of the tool that created the current index.
|
|
474
|
+
|
|
475
|
+
Returns:
|
|
476
|
+
Version string or None if not available
|
|
477
|
+
"""
|
|
478
|
+
if not self._index_metadata_file.exists():
|
|
479
|
+
return None
|
|
480
|
+
|
|
481
|
+
try:
|
|
482
|
+
with open(self._index_metadata_file) as f:
|
|
483
|
+
data = json.load(f)
|
|
484
|
+
return data.get("index_version")
|
|
485
|
+
except Exception as e:
|
|
486
|
+
logger.warning(f"Failed to read index version: {e}")
|
|
487
|
+
return None
|
|
488
|
+
|
|
489
|
+
def needs_reindex_for_version(self) -> bool:
|
|
490
|
+
"""Check if reindex is needed due to version upgrade.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
True if reindex is needed for version compatibility
|
|
494
|
+
"""
|
|
495
|
+
index_version = self.get_index_version()
|
|
496
|
+
|
|
497
|
+
if not index_version:
|
|
498
|
+
# No version recorded - this is either a new index or legacy format
|
|
499
|
+
# Reindex to establish version tracking
|
|
500
|
+
return True
|
|
501
|
+
|
|
502
|
+
try:
|
|
503
|
+
current = version.parse(__version__)
|
|
504
|
+
indexed = version.parse(index_version)
|
|
505
|
+
|
|
506
|
+
# Reindex on major or minor version change
|
|
507
|
+
# Patch versions (0.5.1 -> 0.5.2) don't require reindex
|
|
508
|
+
needs_reindex = (
|
|
509
|
+
current.major != indexed.major or current.minor != indexed.minor
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
if needs_reindex:
|
|
513
|
+
logger.info(
|
|
514
|
+
f"Version upgrade detected: {index_version} -> {__version__} "
|
|
515
|
+
f"(reindex recommended)"
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
return needs_reindex
|
|
519
|
+
|
|
520
|
+
except Exception as e:
|
|
521
|
+
logger.warning(f"Failed to compare versions: {e}")
|
|
522
|
+
# If we can't parse versions, be safe and reindex
|
|
523
|
+
return True
|
|
524
|
+
|
|
450
525
|
async def get_indexing_stats(self) -> dict:
|
|
451
526
|
"""Get statistics about the indexing process.
|
|
452
527
|
|