mcp-vector-search 0.12.6__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_vector_search/__init__.py +2 -2
- mcp_vector_search/analysis/__init__.py +64 -0
- mcp_vector_search/analysis/collectors/__init__.py +39 -0
- mcp_vector_search/analysis/collectors/base.py +164 -0
- mcp_vector_search/analysis/collectors/complexity.py +743 -0
- mcp_vector_search/analysis/metrics.py +341 -0
- mcp_vector_search/analysis/reporters/__init__.py +5 -0
- mcp_vector_search/analysis/reporters/console.py +222 -0
- mcp_vector_search/cli/commands/analyze.py +408 -0
- mcp_vector_search/cli/commands/chat.py +1262 -0
- mcp_vector_search/cli/commands/index.py +21 -3
- mcp_vector_search/cli/commands/init.py +13 -0
- mcp_vector_search/cli/commands/install.py +597 -335
- mcp_vector_search/cli/commands/install_old.py +8 -4
- mcp_vector_search/cli/commands/mcp.py +78 -6
- mcp_vector_search/cli/commands/reset.py +68 -26
- mcp_vector_search/cli/commands/search.py +30 -7
- mcp_vector_search/cli/commands/setup.py +1133 -0
- mcp_vector_search/cli/commands/status.py +37 -2
- mcp_vector_search/cli/commands/uninstall.py +276 -357
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +276 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +714 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +311 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +180 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +2507 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +1313 -0
- mcp_vector_search/cli/commands/visualize.py.original +2536 -0
- mcp_vector_search/cli/didyoumean.py +22 -2
- mcp_vector_search/cli/main.py +115 -159
- mcp_vector_search/cli/output.py +24 -8
- mcp_vector_search/config/__init__.py +4 -0
- mcp_vector_search/config/default_thresholds.yaml +52 -0
- mcp_vector_search/config/settings.py +12 -0
- mcp_vector_search/config/thresholds.py +185 -0
- mcp_vector_search/core/auto_indexer.py +3 -3
- mcp_vector_search/core/boilerplate.py +186 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/database.py +369 -94
- mcp_vector_search/core/exceptions.py +11 -0
- mcp_vector_search/core/git_hooks.py +4 -4
- mcp_vector_search/core/indexer.py +221 -4
- mcp_vector_search/core/llm_client.py +751 -0
- mcp_vector_search/core/models.py +3 -0
- mcp_vector_search/core/project.py +17 -0
- mcp_vector_search/core/scheduler.py +11 -11
- mcp_vector_search/core/search.py +179 -29
- mcp_vector_search/mcp/server.py +24 -5
- mcp_vector_search/utils/__init__.py +2 -0
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +66 -4
- mcp_vector_search/utils/timing.py +10 -6
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/METADATA +182 -52
- mcp_vector_search-1.0.3.dist-info/RECORD +97 -0
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/WHEEL +1 -1
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/entry_points.txt +1 -0
- mcp_vector_search/cli/commands/visualize.py +0 -1467
- mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/licenses/LICENSE +0 -0
mcp_vector_search/core/models.py
CHANGED
|
@@ -147,6 +147,9 @@ class SearchResult(BaseModel):
|
|
|
147
147
|
context_before: list[str] = Field(default=[], description="Lines before the match")
|
|
148
148
|
context_after: list[str] = Field(default=[], description="Lines after the match")
|
|
149
149
|
highlights: list[str] = Field(default=[], description="Highlighted terms")
|
|
150
|
+
file_missing: bool = Field(
|
|
151
|
+
default=False, description="True if file no longer exists (stale index)"
|
|
152
|
+
)
|
|
150
153
|
|
|
151
154
|
class Config:
|
|
152
155
|
arbitrary_types_allowed = True
|
|
@@ -107,6 +107,23 @@ class ProjectManager:
|
|
|
107
107
|
index_path = get_default_index_path(self.project_root)
|
|
108
108
|
index_path.mkdir(parents=True, exist_ok=True)
|
|
109
109
|
|
|
110
|
+
# Ensure .mcp-vector-search/ is in .gitignore
|
|
111
|
+
# This is a non-critical operation - failures are logged but don't block initialization
|
|
112
|
+
try:
|
|
113
|
+
from ..utils.gitignore_updater import ensure_gitignore_entry
|
|
114
|
+
|
|
115
|
+
ensure_gitignore_entry(
|
|
116
|
+
self.project_root,
|
|
117
|
+
pattern=".mcp-vector-search/",
|
|
118
|
+
comment="MCP Vector Search index directory",
|
|
119
|
+
)
|
|
120
|
+
except Exception as e:
|
|
121
|
+
# Log warning but continue initialization
|
|
122
|
+
logger.warning(f"Could not update .gitignore: {e}")
|
|
123
|
+
logger.info(
|
|
124
|
+
"Please manually add '.mcp-vector-search/' to your .gitignore file"
|
|
125
|
+
)
|
|
126
|
+
|
|
110
127
|
# Detect languages and files
|
|
111
128
|
detected_languages = self.detect_languages()
|
|
112
129
|
file_count = self.count_indexable_files(
|
|
@@ -73,7 +73,7 @@ class SchedulerManager:
|
|
|
73
73
|
project_root = str(self.project_root)
|
|
74
74
|
|
|
75
75
|
# Create wrapper script
|
|
76
|
-
script_content = f
|
|
76
|
+
script_content = f"""#!/bin/bash
|
|
77
77
|
# MCP Vector Search Auto-Reindex - {task_name}
|
|
78
78
|
cd "{project_root}" || exit 1
|
|
79
79
|
|
|
@@ -85,7 +85,7 @@ elif [ -f "{python_path}" ]; then
|
|
|
85
85
|
else
|
|
86
86
|
python3 -m mcp_vector_search auto-index check --auto-reindex --max-files 10
|
|
87
87
|
fi
|
|
88
|
-
|
|
88
|
+
"""
|
|
89
89
|
|
|
90
90
|
# Write script to temp file
|
|
91
91
|
script_dir = Path.home() / ".mcp-vector-search" / "scripts"
|
|
@@ -109,7 +109,7 @@ fi
|
|
|
109
109
|
|
|
110
110
|
# Get current crontab
|
|
111
111
|
try:
|
|
112
|
-
result = subprocess.run(
|
|
112
|
+
result = subprocess.run( # nosec B607
|
|
113
113
|
["crontab", "-l"], capture_output=True, text=True, check=True
|
|
114
114
|
)
|
|
115
115
|
current_crontab = result.stdout
|
|
@@ -125,7 +125,7 @@ fi
|
|
|
125
125
|
new_crontab = current_crontab + cron_entry
|
|
126
126
|
|
|
127
127
|
# Install new crontab
|
|
128
|
-
process = subprocess.Popen(
|
|
128
|
+
process = subprocess.Popen( # nosec B607
|
|
129
129
|
["crontab", "-"], stdin=subprocess.PIPE, text=True
|
|
130
130
|
)
|
|
131
131
|
process.communicate(input=new_crontab)
|
|
@@ -148,7 +148,7 @@ fi
|
|
|
148
148
|
try:
|
|
149
149
|
# Get current crontab
|
|
150
150
|
try:
|
|
151
|
-
result = subprocess.run(
|
|
151
|
+
result = subprocess.run( # nosec B607
|
|
152
152
|
["crontab", "-l"], capture_output=True, text=True, check=True
|
|
153
153
|
)
|
|
154
154
|
current_crontab = result.stdout
|
|
@@ -163,13 +163,13 @@ fi
|
|
|
163
163
|
|
|
164
164
|
# Install new crontab
|
|
165
165
|
if new_crontab.strip():
|
|
166
|
-
process = subprocess.Popen(
|
|
166
|
+
process = subprocess.Popen( # nosec B607
|
|
167
167
|
["crontab", "-"], stdin=subprocess.PIPE, text=True
|
|
168
168
|
)
|
|
169
169
|
process.communicate(input=new_crontab)
|
|
170
170
|
else:
|
|
171
171
|
# Remove crontab entirely if empty
|
|
172
|
-
subprocess.run(["crontab", "-r"], check=False)
|
|
172
|
+
subprocess.run(["crontab", "-r"], check=False) # nosec B607
|
|
173
173
|
|
|
174
174
|
# Remove script file
|
|
175
175
|
script_dir = Path.home() / ".mcp-vector-search" / "scripts"
|
|
@@ -191,7 +191,7 @@ fi
|
|
|
191
191
|
project_root = str(self.project_root)
|
|
192
192
|
|
|
193
193
|
# Create PowerShell script
|
|
194
|
-
script_content = f
|
|
194
|
+
script_content = f"""# MCP Vector Search Auto-Reindex - {task_name}
|
|
195
195
|
Set-Location "{project_root}"
|
|
196
196
|
|
|
197
197
|
try {{
|
|
@@ -205,7 +205,7 @@ try {{
|
|
|
205
205
|
}} catch {{
|
|
206
206
|
# Silently ignore errors
|
|
207
207
|
}}
|
|
208
|
-
|
|
208
|
+
"""
|
|
209
209
|
|
|
210
210
|
# Write script
|
|
211
211
|
script_dir = Path.home() / ".mcp-vector-search" / "scripts"
|
|
@@ -302,7 +302,7 @@ try {{
|
|
|
302
302
|
def _get_cron_status(self, task_name: str) -> dict:
|
|
303
303
|
"""Get cron job status."""
|
|
304
304
|
try:
|
|
305
|
-
result = subprocess.run(
|
|
305
|
+
result = subprocess.run( # nosec B607
|
|
306
306
|
["crontab", "-l"], capture_output=True, text=True, check=True
|
|
307
307
|
)
|
|
308
308
|
|
|
@@ -315,7 +315,7 @@ try {{
|
|
|
315
315
|
def _get_windows_task_status(self, task_name: str) -> dict:
|
|
316
316
|
"""Get Windows task status."""
|
|
317
317
|
try:
|
|
318
|
-
result = subprocess.run(
|
|
318
|
+
result = subprocess.run( # nosec B607
|
|
319
319
|
["schtasks", "/query", "/tn", task_name], capture_output=True, text=True
|
|
320
320
|
)
|
|
321
321
|
|
mcp_vector_search/core/search.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Semantic search engine for MCP Vector Search."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import re
|
|
4
5
|
import time
|
|
5
6
|
from collections import OrderedDict
|
|
@@ -11,8 +12,9 @@ from loguru import logger
|
|
|
11
12
|
|
|
12
13
|
from ..config.constants import DEFAULT_CACHE_SIZE
|
|
13
14
|
from .auto_indexer import AutoIndexer, SearchTriggeredIndexer
|
|
15
|
+
from .boilerplate import BoilerplateFilter
|
|
14
16
|
from .database import VectorDatabase
|
|
15
|
-
from .exceptions import SearchError
|
|
17
|
+
from .exceptions import RustPanicError, SearchError
|
|
16
18
|
from .models import SearchResult
|
|
17
19
|
|
|
18
20
|
|
|
@@ -67,6 +69,7 @@ class SemanticSearchEngine:
|
|
|
67
69
|
_BOOST_SHALLOW_PATH = 0.02
|
|
68
70
|
_PENALTY_TEST_FILE = -0.02
|
|
69
71
|
_PENALTY_DEEP_PATH = -0.01
|
|
72
|
+
_PENALTY_BOILERPLATE = -0.15
|
|
70
73
|
|
|
71
74
|
def __init__(
|
|
72
75
|
self,
|
|
@@ -106,6 +109,156 @@ class SemanticSearchEngine:
|
|
|
106
109
|
self._last_health_check: float = 0.0
|
|
107
110
|
self._health_check_interval: float = 60.0
|
|
108
111
|
|
|
112
|
+
# Boilerplate filter for smart result ranking
|
|
113
|
+
self._boilerplate_filter = BoilerplateFilter()
|
|
114
|
+
|
|
115
|
+
@staticmethod
|
|
116
|
+
def _is_rust_panic_error(error: Exception) -> bool:
|
|
117
|
+
"""Detect ChromaDB Rust panic errors.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
error: Exception to check
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
True if this is a Rust panic error
|
|
124
|
+
"""
|
|
125
|
+
error_msg = str(error).lower()
|
|
126
|
+
|
|
127
|
+
# Check for the specific Rust panic pattern
|
|
128
|
+
# "range start index X out of range for slice of length Y"
|
|
129
|
+
if "range start index" in error_msg and "out of range" in error_msg:
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
# Check for other Rust panic indicators
|
|
133
|
+
rust_panic_patterns = [
|
|
134
|
+
"rust panic",
|
|
135
|
+
"pyo3_runtime.panicexception",
|
|
136
|
+
"thread 'tokio-runtime-worker' panicked",
|
|
137
|
+
"rust/sqlite/src/db.rs", # Specific to the known ChromaDB issue
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
return any(pattern in error_msg for pattern in rust_panic_patterns)
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def _is_corruption_error(error: Exception) -> bool:
|
|
144
|
+
"""Detect index corruption errors.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
error: Exception to check
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
True if this is a corruption error
|
|
151
|
+
"""
|
|
152
|
+
error_msg = str(error).lower()
|
|
153
|
+
|
|
154
|
+
corruption_indicators = [
|
|
155
|
+
"pickle",
|
|
156
|
+
"unpickling",
|
|
157
|
+
"eof",
|
|
158
|
+
"ran out of input",
|
|
159
|
+
"hnsw",
|
|
160
|
+
"deserialize",
|
|
161
|
+
"corrupt",
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
return any(indicator in error_msg for indicator in corruption_indicators)
|
|
165
|
+
|
|
166
|
+
async def _search_with_retry(
|
|
167
|
+
self,
|
|
168
|
+
query: str,
|
|
169
|
+
limit: int,
|
|
170
|
+
filters: dict[str, Any] | None,
|
|
171
|
+
threshold: float,
|
|
172
|
+
max_retries: int = 3,
|
|
173
|
+
) -> list[SearchResult]:
|
|
174
|
+
"""Execute search with retry logic and exponential backoff.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
query: Processed search query
|
|
178
|
+
limit: Maximum number of results
|
|
179
|
+
filters: Optional filters
|
|
180
|
+
threshold: Similarity threshold
|
|
181
|
+
max_retries: Maximum retry attempts (default: 3)
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
List of search results
|
|
185
|
+
|
|
186
|
+
Raises:
|
|
187
|
+
RustPanicError: If Rust panic persists after retries
|
|
188
|
+
SearchError: If search fails for other reasons
|
|
189
|
+
"""
|
|
190
|
+
last_error = None
|
|
191
|
+
backoff_delays = [0, 0.1, 0.5] # Immediate, 100ms, 500ms
|
|
192
|
+
|
|
193
|
+
for attempt in range(max_retries):
|
|
194
|
+
try:
|
|
195
|
+
# Add delay for retries (exponential backoff)
|
|
196
|
+
if attempt > 0 and backoff_delays[attempt] > 0:
|
|
197
|
+
await asyncio.sleep(backoff_delays[attempt])
|
|
198
|
+
logger.debug(
|
|
199
|
+
f"Retrying search after {backoff_delays[attempt]}s delay (attempt {attempt + 1}/{max_retries})"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Perform the actual search
|
|
203
|
+
results = await self.database.search(
|
|
204
|
+
query=query,
|
|
205
|
+
limit=limit,
|
|
206
|
+
filters=filters,
|
|
207
|
+
similarity_threshold=threshold,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Success! If we had retries, log that we recovered
|
|
211
|
+
if attempt > 0:
|
|
212
|
+
logger.info(
|
|
213
|
+
f"Search succeeded after {attempt + 1} attempts (recovered from transient error)"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
return results
|
|
217
|
+
|
|
218
|
+
except BaseException as e:
|
|
219
|
+
# Re-raise system exceptions we should never catch
|
|
220
|
+
if isinstance(e, (KeyboardInterrupt, SystemExit, GeneratorExit)):
|
|
221
|
+
raise
|
|
222
|
+
|
|
223
|
+
last_error = e
|
|
224
|
+
|
|
225
|
+
# Check if this is a Rust panic
|
|
226
|
+
if self._is_rust_panic_error(e):
|
|
227
|
+
logger.warning(
|
|
228
|
+
f"ChromaDB Rust panic detected (attempt {attempt + 1}/{max_retries}): {e}"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# If this is the last retry, escalate to corruption recovery
|
|
232
|
+
if attempt == max_retries - 1:
|
|
233
|
+
logger.error(
|
|
234
|
+
"Rust panic persisted after all retries - index may be corrupted"
|
|
235
|
+
)
|
|
236
|
+
raise RustPanicError(
|
|
237
|
+
"ChromaDB Rust panic detected. The HNSW index may be corrupted. "
|
|
238
|
+
"Please run 'mcp-vector-search reset' followed by 'mcp-vector-search index' to rebuild."
|
|
239
|
+
) from e
|
|
240
|
+
|
|
241
|
+
# Otherwise, continue to next retry
|
|
242
|
+
continue
|
|
243
|
+
|
|
244
|
+
# Check for general corruption
|
|
245
|
+
elif self._is_corruption_error(e):
|
|
246
|
+
logger.error(f"Index corruption detected: {e}")
|
|
247
|
+
raise SearchError(
|
|
248
|
+
"Index corruption detected. Please run 'mcp-vector-search reset' "
|
|
249
|
+
"followed by 'mcp-vector-search index' to rebuild."
|
|
250
|
+
) from e
|
|
251
|
+
|
|
252
|
+
# Some other error - don't retry, just fail
|
|
253
|
+
else:
|
|
254
|
+
logger.error(f"Search failed: {e}")
|
|
255
|
+
raise SearchError(f"Search failed: {e}") from e
|
|
256
|
+
|
|
257
|
+
# Should never reach here, but just in case
|
|
258
|
+
raise SearchError(
|
|
259
|
+
f"Search failed after {max_retries} retries: {last_error}"
|
|
260
|
+
) from last_error
|
|
261
|
+
|
|
109
262
|
async def search(
|
|
110
263
|
self,
|
|
111
264
|
query: str,
|
|
@@ -162,12 +315,12 @@ class SemanticSearchEngine:
|
|
|
162
315
|
# Preprocess query
|
|
163
316
|
processed_query = self._preprocess_query(query)
|
|
164
317
|
|
|
165
|
-
# Perform vector search
|
|
166
|
-
results = await self.
|
|
318
|
+
# Perform vector search with retry logic
|
|
319
|
+
results = await self._search_with_retry(
|
|
167
320
|
query=processed_query,
|
|
168
321
|
limit=limit,
|
|
169
322
|
filters=filters,
|
|
170
|
-
|
|
323
|
+
threshold=threshold,
|
|
171
324
|
)
|
|
172
325
|
|
|
173
326
|
# Post-process results
|
|
@@ -184,32 +337,13 @@ class SemanticSearchEngine:
|
|
|
184
337
|
)
|
|
185
338
|
return ranked_results
|
|
186
339
|
|
|
340
|
+
except (RustPanicError, SearchError):
|
|
341
|
+
# These errors are already properly formatted with user guidance
|
|
342
|
+
raise
|
|
187
343
|
except Exception as e:
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
indicator in error_msg
|
|
192
|
-
for indicator in [
|
|
193
|
-
"pickle",
|
|
194
|
-
"unpickling",
|
|
195
|
-
"eof",
|
|
196
|
-
"ran out of input",
|
|
197
|
-
"hnsw",
|
|
198
|
-
"index",
|
|
199
|
-
"deserialize",
|
|
200
|
-
"corrupt",
|
|
201
|
-
]
|
|
202
|
-
):
|
|
203
|
-
logger.error(f"Index corruption detected during search: {e}")
|
|
204
|
-
logger.info(
|
|
205
|
-
"The index appears to be corrupted. Please run 'mcp-vector-search reset' to clear the index and then 'mcp-vector-search index' to rebuild it."
|
|
206
|
-
)
|
|
207
|
-
raise SearchError(
|
|
208
|
-
"Index corruption detected. Please run 'mcp-vector-search reset' followed by 'mcp-vector-search index' to rebuild."
|
|
209
|
-
) from e
|
|
210
|
-
else:
|
|
211
|
-
logger.error(f"Search failed for query '{query}': {e}")
|
|
212
|
-
raise SearchError(f"Search failed: {e}") from e
|
|
344
|
+
# Unexpected error - wrap it in SearchError
|
|
345
|
+
logger.error(f"Unexpected search error for query '{query}': {e}")
|
|
346
|
+
raise SearchError(f"Search failed: {e}") from e
|
|
213
347
|
|
|
214
348
|
async def search_similar(
|
|
215
349
|
self,
|
|
@@ -470,6 +604,11 @@ class SemanticSearchEngine:
|
|
|
470
604
|
result.context_before = context_before
|
|
471
605
|
result.context_after = context_after
|
|
472
606
|
|
|
607
|
+
except FileNotFoundError:
|
|
608
|
+
# File was deleted since indexing - silently skip context
|
|
609
|
+
# This is normal when index is stale; use --force to reindex
|
|
610
|
+
logger.debug(f"File no longer exists (stale index): {result.file_path}")
|
|
611
|
+
result.file_missing = True # Mark for potential filtering
|
|
473
612
|
except Exception as e:
|
|
474
613
|
logger.warning(f"Failed to get context for {result.file_path}: {e}")
|
|
475
614
|
|
|
@@ -562,6 +701,17 @@ class SemanticSearchEngine:
|
|
|
562
701
|
elif path_depth > 5:
|
|
563
702
|
score += self._PENALTY_DEEP_PATH
|
|
564
703
|
|
|
704
|
+
# Factor 7: Boilerplate penalty (penalize common boilerplate patterns)
|
|
705
|
+
# Apply penalty to function names (constructors, lifecycle methods, etc.)
|
|
706
|
+
if result.function_name:
|
|
707
|
+
boilerplate_penalty = self._boilerplate_filter.get_penalty(
|
|
708
|
+
name=result.function_name,
|
|
709
|
+
language=result.language,
|
|
710
|
+
query=query,
|
|
711
|
+
penalty=self._PENALTY_BOILERPLATE,
|
|
712
|
+
)
|
|
713
|
+
score += boilerplate_penalty
|
|
714
|
+
|
|
565
715
|
# Ensure score doesn't exceed 1.0
|
|
566
716
|
result.similarity_score = min(1.0, score)
|
|
567
717
|
|
mcp_vector_search/mcp/server.py
CHANGED
|
@@ -38,11 +38,28 @@ class MCPVectorSearchServer:
|
|
|
38
38
|
"""Initialize the MCP server.
|
|
39
39
|
|
|
40
40
|
Args:
|
|
41
|
-
project_root: Project root directory. If None, will auto-detect
|
|
41
|
+
project_root: Project root directory. If None, will auto-detect from:
|
|
42
|
+
1. PROJECT_ROOT or MCP_PROJECT_ROOT environment variable
|
|
43
|
+
2. Current working directory
|
|
42
44
|
enable_file_watching: Enable file watching for automatic reindexing.
|
|
43
45
|
If None, checks MCP_ENABLE_FILE_WATCHING env var (default: True).
|
|
44
46
|
"""
|
|
45
|
-
|
|
47
|
+
# Auto-detect project root from environment or current directory
|
|
48
|
+
if project_root is None:
|
|
49
|
+
# Priority 1: MCP_PROJECT_ROOT (new standard)
|
|
50
|
+
# Priority 2: PROJECT_ROOT (legacy)
|
|
51
|
+
# Priority 3: Current working directory
|
|
52
|
+
env_project_root = os.getenv("MCP_PROJECT_ROOT") or os.getenv(
|
|
53
|
+
"PROJECT_ROOT"
|
|
54
|
+
)
|
|
55
|
+
if env_project_root:
|
|
56
|
+
project_root = Path(env_project_root).resolve()
|
|
57
|
+
logger.info(f"Using project root from environment: {project_root}")
|
|
58
|
+
else:
|
|
59
|
+
project_root = Path.cwd()
|
|
60
|
+
logger.info(f"Using current directory as project root: {project_root}")
|
|
61
|
+
|
|
62
|
+
self.project_root = project_root
|
|
46
63
|
self.project_manager = ProjectManager(self.project_root)
|
|
47
64
|
self.search_engine: SemanticSearchEngine | None = None
|
|
48
65
|
self.file_watcher: FileWatcher | None = None
|
|
@@ -397,9 +414,11 @@ class MCPVectorSearchServer:
|
|
|
397
414
|
"languages": config.languages,
|
|
398
415
|
"total_chunks": stats.total_chunks,
|
|
399
416
|
"total_files": stats.total_files,
|
|
400
|
-
"index_size":
|
|
401
|
-
|
|
402
|
-
|
|
417
|
+
"index_size": (
|
|
418
|
+
f"{stats.index_size_mb:.2f} MB"
|
|
419
|
+
if hasattr(stats, "index_size_mb")
|
|
420
|
+
else "Unknown"
|
|
421
|
+
),
|
|
403
422
|
}
|
|
404
423
|
else:
|
|
405
424
|
status_info = {
|
|
@@ -6,6 +6,7 @@ from .gitignore import (
|
|
|
6
6
|
create_gitignore_parser,
|
|
7
7
|
is_path_gitignored,
|
|
8
8
|
)
|
|
9
|
+
from .gitignore_updater import ensure_gitignore_entry
|
|
9
10
|
from .timing import (
|
|
10
11
|
PerformanceProfiler,
|
|
11
12
|
SearchProfiler,
|
|
@@ -24,6 +25,7 @@ __all__ = [
|
|
|
24
25
|
"GitignorePattern",
|
|
25
26
|
"create_gitignore_parser",
|
|
26
27
|
"is_path_gitignored",
|
|
28
|
+
"ensure_gitignore_entry",
|
|
27
29
|
# Timing utilities
|
|
28
30
|
"PerformanceProfiler",
|
|
29
31
|
"TimingResult",
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""Gitignore file update utilities for automatic .gitignore entry management."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def ensure_gitignore_entry(
|
|
9
|
+
project_root: Path,
|
|
10
|
+
pattern: str = ".mcp-vector-search/",
|
|
11
|
+
comment: str | None = "MCP Vector Search index directory",
|
|
12
|
+
create_if_missing: bool = True,
|
|
13
|
+
) -> bool:
|
|
14
|
+
"""Ensure a pattern exists in .gitignore file.
|
|
15
|
+
|
|
16
|
+
This function safely adds a pattern to .gitignore if it doesn't already exist.
|
|
17
|
+
It handles various edge cases including:
|
|
18
|
+
- Non-existent .gitignore files (creates if in git repo)
|
|
19
|
+
- Empty .gitignore files
|
|
20
|
+
- Existing patterns in various formats
|
|
21
|
+
- Negation patterns (conflict detection)
|
|
22
|
+
- Permission errors
|
|
23
|
+
- Encoding issues
|
|
24
|
+
|
|
25
|
+
Design Decision: Non-Blocking Operation
|
|
26
|
+
----------------------------------------
|
|
27
|
+
This function is designed to be non-critical and non-blocking. It will:
|
|
28
|
+
- NEVER raise exceptions (returns False on errors)
|
|
29
|
+
- Log warnings for failures instead of blocking
|
|
30
|
+
- Allow project initialization to continue even if gitignore update fails
|
|
31
|
+
|
|
32
|
+
Rationale: .gitignore updates are a quality-of-life improvement, not a
|
|
33
|
+
requirement for mcp-vector-search functionality. Users can manually add
|
|
34
|
+
the entry if automatic update fails.
|
|
35
|
+
|
|
36
|
+
Pattern Detection Strategy
|
|
37
|
+
--------------------------
|
|
38
|
+
The function checks for semantic equivalents of the pattern:
|
|
39
|
+
- `.mcp-vector-search/` (exact match)
|
|
40
|
+
- `.mcp-vector-search` (without trailing slash)
|
|
41
|
+
- `.mcp-vector-search/*` (with wildcard)
|
|
42
|
+
- `/.mcp-vector-search/` (root-relative)
|
|
43
|
+
|
|
44
|
+
All are treated as equivalent to avoid duplicate entries.
|
|
45
|
+
|
|
46
|
+
Edge Cases Handled
|
|
47
|
+
------------------
|
|
48
|
+
1. .gitignore does not exist -> Create (if in git repo)
|
|
49
|
+
2. .gitignore is empty -> Add pattern
|
|
50
|
+
3. Pattern already exists -> Skip (log debug)
|
|
51
|
+
4. Similar pattern exists -> Skip (log debug)
|
|
52
|
+
5. Negation pattern exists -> Warn and skip (respects user intent)
|
|
53
|
+
6. Not a git repository -> Skip (no .gitignore needed)
|
|
54
|
+
7. Permission denied -> Warn and skip (log manual instructions)
|
|
55
|
+
8. Encoding errors -> Try fallback encoding
|
|
56
|
+
9. Missing parent directory -> Should not occur (project_root exists)
|
|
57
|
+
10. Concurrent modification -> Safe (append operation is atomic-ish)
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
project_root: Project root directory (must exist)
|
|
61
|
+
pattern: Pattern to add to .gitignore (default: .mcp-vector-search/)
|
|
62
|
+
comment: Optional comment to add before the pattern
|
|
63
|
+
create_if_missing: Create .gitignore if it doesn't exist (default: True)
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
True if pattern was added or already exists, False on error
|
|
67
|
+
|
|
68
|
+
Performance:
|
|
69
|
+
- Time Complexity: O(n) where n = lines in .gitignore (typically <1000)
|
|
70
|
+
- Space Complexity: O(n) for reading file into memory
|
|
71
|
+
- Expected Runtime: <10ms for typical .gitignore files
|
|
72
|
+
|
|
73
|
+
Notes:
|
|
74
|
+
- Only creates .gitignore in git repositories (checks for .git directory)
|
|
75
|
+
- Preserves existing file structure and encoding (UTF-8)
|
|
76
|
+
- Handles negation patterns gracefully (warns but doesn't override)
|
|
77
|
+
- Non-blocking: logs warnings instead of raising exceptions
|
|
78
|
+
|
|
79
|
+
Examples:
|
|
80
|
+
>>> # Basic usage during project initialization
|
|
81
|
+
>>> ensure_gitignore_entry(Path("/path/to/project"))
|
|
82
|
+
True
|
|
83
|
+
|
|
84
|
+
>>> # Custom pattern with custom comment
|
|
85
|
+
>>> ensure_gitignore_entry(
|
|
86
|
+
... Path("/path/to/project"),
|
|
87
|
+
... pattern=".custom-dir/",
|
|
88
|
+
... comment="Custom tool directory"
|
|
89
|
+
... )
|
|
90
|
+
True
|
|
91
|
+
|
|
92
|
+
>>> # Don't create .gitignore if missing
|
|
93
|
+
>>> ensure_gitignore_entry(
|
|
94
|
+
... Path("/path/to/project"),
|
|
95
|
+
... create_if_missing=False
|
|
96
|
+
... )
|
|
97
|
+
False
|
|
98
|
+
"""
|
|
99
|
+
gitignore_path = project_root / ".gitignore"
|
|
100
|
+
|
|
101
|
+
# Edge Case 1: Check if this is a git repository
|
|
102
|
+
# Only create/modify .gitignore in git repositories to avoid polluting non-git projects
|
|
103
|
+
git_dir = project_root / ".git"
|
|
104
|
+
if not git_dir.exists():
|
|
105
|
+
logger.debug(
|
|
106
|
+
"Not a git repository (no .git directory), skipping .gitignore update"
|
|
107
|
+
)
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
# Edge Case 2: Handle non-existent .gitignore
|
|
112
|
+
if not gitignore_path.exists():
|
|
113
|
+
if not create_if_missing:
|
|
114
|
+
logger.debug(".gitignore does not exist and create_if_missing=False")
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
# Create new .gitignore with the pattern
|
|
118
|
+
content = f"# {comment}\n{pattern}\n" if comment else f"{pattern}\n"
|
|
119
|
+
gitignore_path.write_text(content, encoding="utf-8")
|
|
120
|
+
logger.info(f"Created .gitignore with {pattern} entry")
|
|
121
|
+
return True
|
|
122
|
+
|
|
123
|
+
# Read existing content with UTF-8 encoding
|
|
124
|
+
try:
|
|
125
|
+
content = gitignore_path.read_text(encoding="utf-8")
|
|
126
|
+
except UnicodeDecodeError:
|
|
127
|
+
# Edge Case 8: Fallback to more lenient encoding
|
|
128
|
+
logger.debug("UTF-8 decode failed, trying with error replacement")
|
|
129
|
+
try:
|
|
130
|
+
content = gitignore_path.read_text(encoding="utf-8", errors="replace")
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.warning(
|
|
133
|
+
f"Failed to read .gitignore due to encoding error: {e}. "
|
|
134
|
+
f"Please manually add '{pattern}' to your .gitignore"
|
|
135
|
+
)
|
|
136
|
+
return False
|
|
137
|
+
|
|
138
|
+
# Edge Case 3: Handle empty .gitignore
|
|
139
|
+
stripped_content = content.strip()
|
|
140
|
+
if not stripped_content:
|
|
141
|
+
content = f"# {comment}\n{pattern}\n" if comment else f"{pattern}\n"
|
|
142
|
+
gitignore_path.write_text(content, encoding="utf-8")
|
|
143
|
+
logger.info(f"Added {pattern} to empty .gitignore")
|
|
144
|
+
return True
|
|
145
|
+
|
|
146
|
+
# Check for existing patterns (Edge Cases 4, 5, 6)
|
|
147
|
+
lines = content.split("\n")
|
|
148
|
+
normalized_pattern = pattern.rstrip("/").lstrip("/")
|
|
149
|
+
|
|
150
|
+
for line in lines:
|
|
151
|
+
# Skip comments and empty lines
|
|
152
|
+
stripped_line = line.strip()
|
|
153
|
+
if not stripped_line or stripped_line.startswith("#"):
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
# Edge Case 6: Check for negation pattern (conflict)
|
|
157
|
+
# Negation patterns indicate explicit user intent to track the directory
|
|
158
|
+
if stripped_line.startswith("!") and normalized_pattern in stripped_line:
|
|
159
|
+
logger.warning(
|
|
160
|
+
f".gitignore contains negation pattern: {stripped_line}. "
|
|
161
|
+
"This indicates you want to track .mcp-vector-search/ in git. "
|
|
162
|
+
"Skipping automatic entry to respect your configuration."
|
|
163
|
+
)
|
|
164
|
+
return False
|
|
165
|
+
|
|
166
|
+
# Normalize line for comparison
|
|
167
|
+
normalized_line = stripped_line.rstrip("/").lstrip("/")
|
|
168
|
+
|
|
169
|
+
# Edge Cases 4 & 5: Check for exact or similar matches
|
|
170
|
+
# These patterns are semantically equivalent for .gitignore:
|
|
171
|
+
# - .mcp-vector-search/
|
|
172
|
+
# - .mcp-vector-search
|
|
173
|
+
# - .mcp-vector-search/*
|
|
174
|
+
# - /.mcp-vector-search/
|
|
175
|
+
if (
|
|
176
|
+
normalized_line == normalized_pattern
|
|
177
|
+
or normalized_line == normalized_pattern + "/*"
|
|
178
|
+
):
|
|
179
|
+
logger.debug(f"Pattern already exists in .gitignore: {stripped_line}")
|
|
180
|
+
return True
|
|
181
|
+
|
|
182
|
+
# Pattern doesn't exist, add it
|
|
183
|
+
# Preserve file structure: ensure proper newline handling
|
|
184
|
+
if not content.endswith("\n"):
|
|
185
|
+
content += "\n"
|
|
186
|
+
|
|
187
|
+
# Add blank line before comment for visual separation
|
|
188
|
+
content += "\n"
|
|
189
|
+
|
|
190
|
+
if comment:
|
|
191
|
+
content += f"# {comment}\n"
|
|
192
|
+
content += f"{pattern}\n"
|
|
193
|
+
|
|
194
|
+
# Write back to file
|
|
195
|
+
gitignore_path.write_text(content, encoding="utf-8")
|
|
196
|
+
logger.info(f"Added {pattern} to .gitignore")
|
|
197
|
+
return True
|
|
198
|
+
|
|
199
|
+
except PermissionError:
|
|
200
|
+
# Edge Case 7: Handle read-only .gitignore or protected directory
|
|
201
|
+
logger.warning(
|
|
202
|
+
f"Cannot update .gitignore: Permission denied. "
|
|
203
|
+
f"Please manually add '{pattern}' to your .gitignore file at {gitignore_path}"
|
|
204
|
+
)
|
|
205
|
+
return False
|
|
206
|
+
except Exception as e:
|
|
207
|
+
# Catch-all for unexpected errors (don't block initialization)
|
|
208
|
+
logger.warning(
|
|
209
|
+
f"Failed to update .gitignore: {e}. "
|
|
210
|
+
f"Please manually add '{pattern}' to your .gitignore"
|
|
211
|
+
)
|
|
212
|
+
return False
|