mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +3 -2
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/config.py +88 -40
- mcp_vector_search/cli/commands/index.py +198 -52
- mcp_vector_search/cli/commands/init.py +472 -58
- mcp_vector_search/cli/commands/install.py +284 -0
- mcp_vector_search/cli/commands/mcp.py +495 -0
- mcp_vector_search/cli/commands/search.py +241 -87
- mcp_vector_search/cli/commands/status.py +184 -58
- mcp_vector_search/cli/commands/watch.py +34 -35
- mcp_vector_search/cli/didyoumean.py +184 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +292 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +163 -26
- mcp_vector_search/cli/output.py +63 -45
- mcp_vector_search/config/defaults.py +50 -36
- mcp_vector_search/config/settings.py +49 -35
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/connection_pool.py +322 -0
- mcp_vector_search/core/database.py +335 -25
- mcp_vector_search/core/embeddings.py +73 -29
- mcp_vector_search/core/exceptions.py +19 -2
- mcp_vector_search/core/factory.py +310 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +237 -73
- mcp_vector_search/core/models.py +21 -19
- mcp_vector_search/core/project.py +73 -58
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +574 -86
- mcp_vector_search/core/watcher.py +48 -46
- mcp_vector_search/mcp/__init__.py +4 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +701 -0
- mcp_vector_search/parsers/base.py +30 -31
- mcp_vector_search/parsers/javascript.py +74 -48
- mcp_vector_search/parsers/python.py +57 -49
- mcp_vector_search/parsers/registry.py +47 -32
- mcp_vector_search/parsers/text.py +179 -0
- mcp_vector_search/utils/__init__.py +40 -0
- mcp_vector_search/utils/gitignore.py +229 -0
- mcp_vector_search/utils/timing.py +334 -0
- mcp_vector_search/utils/version.py +47 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/METADATA +173 -7
- mcp_vector_search-0.4.11.dist-info/RECORD +54 -0
- mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/WHEEL +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/entry_points.txt +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
"""Timing utilities for performance measurement and optimization."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import statistics
|
|
6
|
+
import time
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from contextlib import asynccontextmanager, contextmanager
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from loguru import logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class TimingResult:
|
|
18
|
+
"""Result of a timing measurement."""
|
|
19
|
+
|
|
20
|
+
operation: str
|
|
21
|
+
duration: float # in seconds
|
|
22
|
+
timestamp: float
|
|
23
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def duration_ms(self) -> float:
|
|
27
|
+
"""Duration in milliseconds."""
|
|
28
|
+
return self.duration * 1000
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def duration_us(self) -> float:
|
|
32
|
+
"""Duration in microseconds."""
|
|
33
|
+
return self.duration * 1_000_000
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class PerformanceProfiler:
|
|
37
|
+
"""Performance profiler for measuring and analyzing operation timings."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, name: str = "default"):
|
|
40
|
+
self.name = name
|
|
41
|
+
self.results: list[TimingResult] = []
|
|
42
|
+
self._active_timers: dict[str, float] = {}
|
|
43
|
+
self._nested_level = 0
|
|
44
|
+
|
|
45
|
+
def start_timer(self, operation: str) -> None:
|
|
46
|
+
"""Start timing an operation."""
|
|
47
|
+
if operation in self._active_timers:
|
|
48
|
+
logger.warning(f"Timer '{operation}' already active, overwriting")
|
|
49
|
+
self._active_timers[operation] = time.perf_counter()
|
|
50
|
+
|
|
51
|
+
def stop_timer(
|
|
52
|
+
self, operation: str, metadata: dict[str, Any] | None = None
|
|
53
|
+
) -> TimingResult:
|
|
54
|
+
"""Stop timing an operation and record the result."""
|
|
55
|
+
if operation not in self._active_timers:
|
|
56
|
+
raise ValueError(f"Timer '{operation}' not found or not started")
|
|
57
|
+
|
|
58
|
+
start_time = self._active_timers.pop(operation)
|
|
59
|
+
duration = time.perf_counter() - start_time
|
|
60
|
+
|
|
61
|
+
result = TimingResult(
|
|
62
|
+
operation=operation,
|
|
63
|
+
duration=duration,
|
|
64
|
+
timestamp=time.time(),
|
|
65
|
+
metadata=metadata or {},
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
self.results.append(result)
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
@contextmanager
|
|
72
|
+
def time_operation(self, operation: str, metadata: dict[str, Any] | None = None):
|
|
73
|
+
"""Context manager for timing an operation."""
|
|
74
|
+
indent = " " * self._nested_level
|
|
75
|
+
logger.debug(f"{indent}⏱️ Starting: {operation}")
|
|
76
|
+
|
|
77
|
+
self._nested_level += 1
|
|
78
|
+
start_time = time.perf_counter()
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
yield
|
|
82
|
+
finally:
|
|
83
|
+
duration = time.perf_counter() - start_time
|
|
84
|
+
self._nested_level -= 1
|
|
85
|
+
|
|
86
|
+
result = TimingResult(
|
|
87
|
+
operation=operation,
|
|
88
|
+
duration=duration,
|
|
89
|
+
timestamp=time.time(),
|
|
90
|
+
metadata=metadata or {},
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
self.results.append(result)
|
|
94
|
+
|
|
95
|
+
indent = " " * self._nested_level
|
|
96
|
+
logger.debug(f"{indent}✅ Completed: {operation} ({duration * 1000:.2f}ms)")
|
|
97
|
+
|
|
98
|
+
@asynccontextmanager
|
|
99
|
+
async def time_async_operation(
|
|
100
|
+
self, operation: str, metadata: dict[str, Any] | None = None
|
|
101
|
+
):
|
|
102
|
+
"""Async context manager for timing an operation."""
|
|
103
|
+
indent = " " * self._nested_level
|
|
104
|
+
logger.debug(f"{indent}⏱️ Starting: {operation}")
|
|
105
|
+
|
|
106
|
+
self._nested_level += 1
|
|
107
|
+
start_time = time.perf_counter()
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
yield
|
|
111
|
+
finally:
|
|
112
|
+
duration = time.perf_counter() - start_time
|
|
113
|
+
self._nested_level -= 1
|
|
114
|
+
|
|
115
|
+
result = TimingResult(
|
|
116
|
+
operation=operation,
|
|
117
|
+
duration=duration,
|
|
118
|
+
timestamp=time.time(),
|
|
119
|
+
metadata=metadata or {},
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
self.results.append(result)
|
|
123
|
+
|
|
124
|
+
indent = " " * self._nested_level
|
|
125
|
+
logger.debug(f"{indent}✅ Completed: {operation} ({duration * 1000:.2f}ms)")
|
|
126
|
+
|
|
127
|
+
def get_stats(self, operation: str | None = None) -> dict[str, Any]:
|
|
128
|
+
"""Get timing statistics for operations."""
|
|
129
|
+
if operation:
|
|
130
|
+
durations = [r.duration for r in self.results if r.operation == operation]
|
|
131
|
+
else:
|
|
132
|
+
durations = [r.duration for r in self.results]
|
|
133
|
+
|
|
134
|
+
if not durations:
|
|
135
|
+
return {}
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
"count": len(durations),
|
|
139
|
+
"total": sum(durations),
|
|
140
|
+
"mean": statistics.mean(durations),
|
|
141
|
+
"median": statistics.median(durations),
|
|
142
|
+
"min": min(durations),
|
|
143
|
+
"max": max(durations),
|
|
144
|
+
"std_dev": statistics.stdev(durations) if len(durations) > 1 else 0.0,
|
|
145
|
+
"p95": statistics.quantiles(durations, n=20)[18]
|
|
146
|
+
if len(durations) >= 20
|
|
147
|
+
else max(durations),
|
|
148
|
+
"p99": statistics.quantiles(durations, n=100)[98]
|
|
149
|
+
if len(durations) >= 100
|
|
150
|
+
else max(durations),
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
def get_operation_breakdown(self) -> dict[str, dict[str, Any]]:
|
|
154
|
+
"""Get breakdown of all operations."""
|
|
155
|
+
operations = {r.operation for r in self.results}
|
|
156
|
+
return {op: self.get_stats(op) for op in operations}
|
|
157
|
+
|
|
158
|
+
def print_report(self, show_individual: bool = False, min_duration_ms: float = 0.0):
|
|
159
|
+
"""Print a detailed performance report."""
|
|
160
|
+
if not self.results:
|
|
161
|
+
print("No timing results recorded.")
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
print(f"\n{'=' * 60}")
|
|
165
|
+
print(f"PERFORMANCE REPORT: {self.name}")
|
|
166
|
+
print(f"{'=' * 60}")
|
|
167
|
+
|
|
168
|
+
# Overall stats
|
|
169
|
+
overall_stats = self.get_stats()
|
|
170
|
+
print("\nOVERALL STATISTICS:")
|
|
171
|
+
print(f" Total operations: {overall_stats['count']}")
|
|
172
|
+
print(f" Total time: {overall_stats['total'] * 1000:.2f}ms")
|
|
173
|
+
print(f" Average: {overall_stats['mean'] * 1000:.2f}ms")
|
|
174
|
+
print(f" Median: {overall_stats['median'] * 1000:.2f}ms")
|
|
175
|
+
print(f" Min: {overall_stats['min'] * 1000:.2f}ms")
|
|
176
|
+
print(f" Max: {overall_stats['max'] * 1000:.2f}ms")
|
|
177
|
+
|
|
178
|
+
# Per-operation breakdown
|
|
179
|
+
breakdown = self.get_operation_breakdown()
|
|
180
|
+
print("\nPER-OPERATION BREAKDOWN:")
|
|
181
|
+
|
|
182
|
+
for operation, stats in sorted(
|
|
183
|
+
breakdown.items(), key=lambda x: x[1]["total"], reverse=True
|
|
184
|
+
):
|
|
185
|
+
print(f"\n {operation}:")
|
|
186
|
+
print(f" Count: {stats['count']}")
|
|
187
|
+
print(
|
|
188
|
+
f" Total: {stats['total'] * 1000:.2f}ms ({stats['total'] / overall_stats['total'] * 100:.1f}%)"
|
|
189
|
+
)
|
|
190
|
+
print(f" Average: {stats['mean'] * 1000:.2f}ms")
|
|
191
|
+
print(
|
|
192
|
+
f" Min/Max: {stats['min'] * 1000:.2f}ms / {stats['max'] * 1000:.2f}ms"
|
|
193
|
+
)
|
|
194
|
+
if stats["count"] > 1:
|
|
195
|
+
print(f" StdDev: {stats['std_dev'] * 1000:.2f}ms")
|
|
196
|
+
|
|
197
|
+
# Individual results if requested
|
|
198
|
+
if show_individual:
|
|
199
|
+
print("\nINDIVIDUAL RESULTS:")
|
|
200
|
+
for result in self.results:
|
|
201
|
+
if result.duration_ms >= min_duration_ms:
|
|
202
|
+
print(f" {result.operation}: {result.duration_ms:.2f}ms")
|
|
203
|
+
if result.metadata:
|
|
204
|
+
print(f" Metadata: {result.metadata}")
|
|
205
|
+
|
|
206
|
+
def save_results(self, file_path: Path):
|
|
207
|
+
"""Save timing results to a JSON file."""
|
|
208
|
+
data = {
|
|
209
|
+
"profiler_name": self.name,
|
|
210
|
+
"timestamp": time.time(),
|
|
211
|
+
"results": [
|
|
212
|
+
{
|
|
213
|
+
"operation": r.operation,
|
|
214
|
+
"duration": r.duration,
|
|
215
|
+
"timestamp": r.timestamp,
|
|
216
|
+
"metadata": r.metadata,
|
|
217
|
+
}
|
|
218
|
+
for r in self.results
|
|
219
|
+
],
|
|
220
|
+
"stats": self.get_operation_breakdown(),
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
with open(file_path, "w") as f:
|
|
224
|
+
json.dump(data, f, indent=2)
|
|
225
|
+
|
|
226
|
+
def clear(self):
|
|
227
|
+
"""Clear all timing results."""
|
|
228
|
+
self.results.clear()
|
|
229
|
+
self._active_timers.clear()
|
|
230
|
+
self._nested_level = 0
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# Global profiler instance
|
|
234
|
+
_global_profiler = PerformanceProfiler("global")
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def time_function(
|
|
238
|
+
operation_name: str | None = None, metadata: dict[str, Any] | None = None
|
|
239
|
+
):
|
|
240
|
+
"""Decorator for timing function execution."""
|
|
241
|
+
|
|
242
|
+
def decorator(func: Callable) -> Callable:
|
|
243
|
+
name = operation_name or f"{func.__module__}.{func.__name__}"
|
|
244
|
+
|
|
245
|
+
if asyncio.iscoroutinefunction(func):
|
|
246
|
+
|
|
247
|
+
async def async_wrapper(*args, **kwargs):
|
|
248
|
+
async with _global_profiler.time_async_operation(name, metadata):
|
|
249
|
+
return await func(*args, **kwargs)
|
|
250
|
+
|
|
251
|
+
return async_wrapper
|
|
252
|
+
else:
|
|
253
|
+
|
|
254
|
+
def sync_wrapper(*args, **kwargs):
|
|
255
|
+
with _global_profiler.time_operation(name, metadata):
|
|
256
|
+
return func(*args, **kwargs)
|
|
257
|
+
|
|
258
|
+
return sync_wrapper
|
|
259
|
+
|
|
260
|
+
return decorator
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@contextmanager
|
|
264
|
+
def time_block(operation: str, metadata: dict[str, Any] | None = None):
|
|
265
|
+
"""Context manager for timing a block of code using the global profiler."""
|
|
266
|
+
with _global_profiler.time_operation(operation, metadata):
|
|
267
|
+
yield
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@asynccontextmanager
|
|
271
|
+
async def time_async_block(operation: str, metadata: dict[str, Any] | None = None):
|
|
272
|
+
"""Async context manager for timing a block of code using the global profiler."""
|
|
273
|
+
async with _global_profiler.time_async_operation(operation, metadata):
|
|
274
|
+
yield
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def get_global_profiler() -> PerformanceProfiler:
|
|
278
|
+
"""Get the global profiler instance."""
|
|
279
|
+
return _global_profiler
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def print_global_report(**kwargs):
|
|
283
|
+
"""Print report from the global profiler."""
|
|
284
|
+
_global_profiler.print_report(**kwargs)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def clear_global_profiler():
|
|
288
|
+
"""Clear the global profiler."""
|
|
289
|
+
_global_profiler.clear()
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
class SearchProfiler(PerformanceProfiler):
|
|
293
|
+
"""Specialized profiler for search operations."""
|
|
294
|
+
|
|
295
|
+
def __init__(self):
|
|
296
|
+
super().__init__("search_profiler")
|
|
297
|
+
|
|
298
|
+
async def profile_search(
|
|
299
|
+
self, search_func: Callable, query: str, **search_kwargs
|
|
300
|
+
) -> tuple[Any, dict[str, float]]:
|
|
301
|
+
"""Profile a complete search operation with detailed breakdown."""
|
|
302
|
+
|
|
303
|
+
async with self.time_async_operation(
|
|
304
|
+
"total_search", {"query": query, "kwargs": search_kwargs}
|
|
305
|
+
):
|
|
306
|
+
# Time the actual search
|
|
307
|
+
async with self.time_async_operation("search_execution", {"query": query}):
|
|
308
|
+
result = await search_func(query, **search_kwargs)
|
|
309
|
+
|
|
310
|
+
# Time result processing if we can measure it
|
|
311
|
+
async with self.time_async_operation(
|
|
312
|
+
"result_processing",
|
|
313
|
+
{"result_count": len(result) if hasattr(result, "__len__") else 0},
|
|
314
|
+
):
|
|
315
|
+
# Simulate any post-processing that might happen
|
|
316
|
+
await asyncio.sleep(0) # Placeholder for actual processing
|
|
317
|
+
|
|
318
|
+
# Return results and timing breakdown
|
|
319
|
+
timing_breakdown = {
|
|
320
|
+
op: self.get_stats(op)["mean"] * 1000 # Convert to ms
|
|
321
|
+
for op in ["total_search", "search_execution", "result_processing"]
|
|
322
|
+
if self.get_stats(op)
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
return result, timing_breakdown
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
# Convenience function for quick search profiling
|
|
329
|
+
async def profile_search_operation(
|
|
330
|
+
search_func: Callable, query: str, **kwargs
|
|
331
|
+
) -> tuple[Any, dict[str, float]]:
|
|
332
|
+
"""Quick function to profile a search operation."""
|
|
333
|
+
profiler = SearchProfiler()
|
|
334
|
+
return await profiler.profile_search(search_func, query, **kwargs)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Version utilities for MCP Vector Search.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for accessing and formatting version information.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from .. import __author__, __build__, __email__, __version__
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_version_info() -> dict[str, Any]:
|
|
12
|
+
"""Get complete version information.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Dictionary containing version, build, and package metadata
|
|
16
|
+
"""
|
|
17
|
+
return {
|
|
18
|
+
"version": __version__,
|
|
19
|
+
"build": __build__,
|
|
20
|
+
"author": __author__,
|
|
21
|
+
"email": __email__,
|
|
22
|
+
"package": "mcp-vector-search",
|
|
23
|
+
"version_string": f"{__version__} (build {__build__})",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_version_string(include_build: bool = True) -> str:
|
|
28
|
+
"""Get formatted version string.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
include_build: Whether to include build number
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Formatted version string
|
|
35
|
+
"""
|
|
36
|
+
if include_build:
|
|
37
|
+
return f"{__version__} (build {__build__})"
|
|
38
|
+
return __version__
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_user_agent() -> str:
|
|
42
|
+
"""Get user agent string for HTTP requests.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
User agent string including version
|
|
46
|
+
"""
|
|
47
|
+
return f"mcp-vector-search/{__version__}"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mcp-vector-search
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.11
|
|
4
4
|
Summary: CLI-first semantic code search with MCP integration
|
|
5
5
|
Project-URL: Homepage, https://github.com/bobmatnyc/mcp-vector-search
|
|
6
6
|
Project-URL: Documentation, https://mcp-vector-search.readthedocs.io
|
|
@@ -40,13 +40,15 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
40
40
|
Requires-Python: >=3.11
|
|
41
41
|
Requires-Dist: aiofiles>=23.0.0
|
|
42
42
|
Requires-Dist: chromadb>=0.5.0
|
|
43
|
+
Requires-Dist: click-didyoumean>=0.3.0
|
|
43
44
|
Requires-Dist: httpx>=0.25.0
|
|
44
45
|
Requires-Dist: loguru>=0.7.0
|
|
46
|
+
Requires-Dist: mcp>=1.12.4
|
|
45
47
|
Requires-Dist: pydantic-settings>=2.1.0
|
|
46
48
|
Requires-Dist: pydantic>=2.5.0
|
|
47
49
|
Requires-Dist: rich>=13.0.0
|
|
48
50
|
Requires-Dist: sentence-transformers>=2.2.2
|
|
49
|
-
Requires-Dist: tree-sitter-
|
|
51
|
+
Requires-Dist: tree-sitter-language-pack>=0.9.0
|
|
50
52
|
Requires-Dist: tree-sitter>=0.20.1
|
|
51
53
|
Requires-Dist: typer>=0.9.0
|
|
52
54
|
Requires-Dist: watchdog>=3.0.0
|
|
@@ -56,6 +58,10 @@ Description-Content-Type: text/markdown
|
|
|
56
58
|
|
|
57
59
|
🔍 **CLI-first semantic code search with MCP integration**
|
|
58
60
|
|
|
61
|
+
[](https://badge.fury.io/py/mcp-vector-search)
|
|
62
|
+
[](https://www.python.org/downloads/)
|
|
63
|
+
[](https://opensource.org/licenses/MIT)
|
|
64
|
+
|
|
59
65
|
> ⚠️ **Alpha Release (v0.0.3)**: This is an early-stage project under active development. Expect breaking changes and rough edges. Feedback and contributions are welcome!
|
|
60
66
|
|
|
61
67
|
A modern, fast, and intelligent code search tool that understands your codebase through semantic analysis and AST parsing. Built with Python, powered by ChromaDB, and designed for developer productivity.
|
|
@@ -75,24 +81,31 @@ A modern, fast, and intelligent code search tool that understands your codebase
|
|
|
75
81
|
- **Rich Output**: Syntax highlighting, similarity scores, context
|
|
76
82
|
- **Fast Performance**: Sub-second search responses, efficient indexing
|
|
77
83
|
- **Modern Architecture**: Async-first, type-safe, modular design
|
|
84
|
+
- **Semi-Automatic Reindexing**: Multiple strategies without daemon processes
|
|
78
85
|
|
|
79
86
|
### 🔧 **Technical Features**
|
|
80
|
-
- **Vector Database**: ChromaDB for
|
|
87
|
+
- **Vector Database**: ChromaDB with connection pooling for 13.6% performance boost
|
|
81
88
|
- **Embedding Models**: Configurable sentence transformers
|
|
82
|
-
- **
|
|
89
|
+
- **Smart Reindexing**: Search-triggered, Git hooks, scheduled tasks, and manual options
|
|
83
90
|
- **Extensible Parsers**: Plugin architecture for new languages
|
|
84
91
|
- **Configuration Management**: Project-specific settings
|
|
92
|
+
- **Production Ready**: Connection pooling, auto-indexing, comprehensive error handling
|
|
85
93
|
|
|
86
94
|
## 🚀 Quick Start
|
|
87
95
|
|
|
88
96
|
### Installation
|
|
89
97
|
|
|
90
98
|
```bash
|
|
91
|
-
# Install
|
|
99
|
+
# Install from PyPI
|
|
100
|
+
pip install mcp-vector-search
|
|
101
|
+
|
|
102
|
+
# Or with UV (recommended)
|
|
92
103
|
uv add mcp-vector-search
|
|
93
104
|
|
|
94
|
-
# Or
|
|
95
|
-
|
|
105
|
+
# Or install from source
|
|
106
|
+
git clone https://github.com/bobmatnyc/mcp-vector-search.git
|
|
107
|
+
cd mcp-vector-search
|
|
108
|
+
uv sync && uv pip install -e .
|
|
96
109
|
```
|
|
97
110
|
|
|
98
111
|
### Basic Usage
|
|
@@ -109,6 +122,9 @@ mcp-vector-search search "authentication logic"
|
|
|
109
122
|
mcp-vector-search search "database connection setup"
|
|
110
123
|
mcp-vector-search search "error handling patterns"
|
|
111
124
|
|
|
125
|
+
# Setup automatic reindexing (recommended)
|
|
126
|
+
mcp-vector-search auto-index setup --method all
|
|
127
|
+
|
|
112
128
|
# Check project status
|
|
113
129
|
mcp-vector-search status
|
|
114
130
|
|
|
@@ -116,6 +132,32 @@ mcp-vector-search status
|
|
|
116
132
|
mcp-vector-search watch
|
|
117
133
|
```
|
|
118
134
|
|
|
135
|
+
### Smart CLI with "Did You Mean" Suggestions
|
|
136
|
+
|
|
137
|
+
The CLI includes intelligent command suggestions for typos:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
# Typos are automatically detected and corrected
|
|
141
|
+
$ mcp-vector-search serach "auth"
|
|
142
|
+
No such command 'serach'. Did you mean 'search'?
|
|
143
|
+
|
|
144
|
+
$ mcp-vector-search indx
|
|
145
|
+
No such command 'indx'. Did you mean 'index'?
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
See [docs/CLI_FEATURES.md](docs/CLI_FEATURES.md) for more details.
|
|
149
|
+
|
|
150
|
+
## Versioning & Releasing
|
|
151
|
+
|
|
152
|
+
This project uses semantic versioning with an automated release workflow.
|
|
153
|
+
|
|
154
|
+
### Quick Commands
|
|
155
|
+
- `make version-show` - Display current version
|
|
156
|
+
- `make release-patch` - Create patch release
|
|
157
|
+
- `make publish` - Publish to PyPI
|
|
158
|
+
|
|
159
|
+
See [docs/VERSIONING_WORKFLOW.md](docs/VERSIONING_WORKFLOW.md) for complete documentation.
|
|
160
|
+
|
|
119
161
|
## 📖 Documentation
|
|
120
162
|
|
|
121
163
|
### Commands
|
|
@@ -142,6 +184,18 @@ mcp-vector-search index /path/to/code
|
|
|
142
184
|
|
|
143
185
|
# Force re-indexing
|
|
144
186
|
mcp-vector-search index --force
|
|
187
|
+
|
|
188
|
+
# Reindex entire project
|
|
189
|
+
mcp-vector-search index reindex
|
|
190
|
+
|
|
191
|
+
# Reindex entire project (explicit)
|
|
192
|
+
mcp-vector-search index reindex --all
|
|
193
|
+
|
|
194
|
+
# Reindex entire project without confirmation
|
|
195
|
+
mcp-vector-search index reindex --force
|
|
196
|
+
|
|
197
|
+
# Reindex specific file
|
|
198
|
+
mcp-vector-search index reindex path/to/file.py
|
|
145
199
|
```
|
|
146
200
|
|
|
147
201
|
#### `search` - Semantic Search
|
|
@@ -159,6 +213,25 @@ mcp-vector-search search "error handling" --limit 10
|
|
|
159
213
|
mcp-vector-search search similar "path/to/function.py:25"
|
|
160
214
|
```
|
|
161
215
|
|
|
216
|
+
#### `auto-index` - Automatic Reindexing
|
|
217
|
+
```bash
|
|
218
|
+
# Setup all auto-indexing strategies
|
|
219
|
+
mcp-vector-search auto-index setup --method all
|
|
220
|
+
|
|
221
|
+
# Setup specific strategies
|
|
222
|
+
mcp-vector-search auto-index setup --method git-hooks
|
|
223
|
+
mcp-vector-search auto-index setup --method scheduled --interval 60
|
|
224
|
+
|
|
225
|
+
# Check for stale files and auto-reindex
|
|
226
|
+
mcp-vector-search auto-index check --auto-reindex --max-files 10
|
|
227
|
+
|
|
228
|
+
# View auto-indexing status
|
|
229
|
+
mcp-vector-search auto-index status
|
|
230
|
+
|
|
231
|
+
# Remove auto-indexing setup
|
|
232
|
+
mcp-vector-search auto-index teardown --method all
|
|
233
|
+
```
|
|
234
|
+
|
|
162
235
|
#### `watch` - File Watching
|
|
163
236
|
```bash
|
|
164
237
|
# Start watching for changes
|
|
@@ -194,6 +267,39 @@ mcp-vector-search config set embedding_model microsoft/codebert-base
|
|
|
194
267
|
mcp-vector-search config models
|
|
195
268
|
```
|
|
196
269
|
|
|
270
|
+
## 🚀 Performance Features
|
|
271
|
+
|
|
272
|
+
### Connection Pooling
|
|
273
|
+
Automatic connection pooling provides **13.6% performance improvement** with zero configuration:
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
# Automatically enabled for high-throughput scenarios
|
|
277
|
+
from mcp_vector_search.core.database import PooledChromaVectorDatabase
|
|
278
|
+
|
|
279
|
+
database = PooledChromaVectorDatabase(
|
|
280
|
+
max_connections=10, # Pool size
|
|
281
|
+
min_connections=2, # Warm connections
|
|
282
|
+
max_idle_time=300.0, # 5 minutes
|
|
283
|
+
)
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Semi-Automatic Reindexing
|
|
287
|
+
Multiple strategies to keep your index up-to-date without daemon processes:
|
|
288
|
+
|
|
289
|
+
1. **Search-Triggered**: Automatically checks for stale files during searches
|
|
290
|
+
2. **Git Hooks**: Triggers reindexing after commits, merges, checkouts
|
|
291
|
+
3. **Scheduled Tasks**: System-level cron jobs or Windows tasks
|
|
292
|
+
4. **Manual Checks**: On-demand via CLI commands
|
|
293
|
+
5. **Periodic Checker**: In-process periodic checks for long-running apps
|
|
294
|
+
|
|
295
|
+
```bash
|
|
296
|
+
# Setup all strategies
|
|
297
|
+
mcp-vector-search auto-index setup --method all
|
|
298
|
+
|
|
299
|
+
# Check status
|
|
300
|
+
mcp-vector-search auto-index status
|
|
301
|
+
```
|
|
302
|
+
|
|
197
303
|
### Configuration
|
|
198
304
|
|
|
199
305
|
Projects are configured via `.mcp-vector-search/config.json`:
|
|
@@ -316,6 +422,66 @@ Please [open an issue](https://github.com/bobmatnyc/mcp-vector-search/issues) or
|
|
|
316
422
|
- [ ] Team collaboration features
|
|
317
423
|
- [ ] Production-ready performance
|
|
318
424
|
|
|
425
|
+
## 🛠️ Development
|
|
426
|
+
|
|
427
|
+
### Three-Stage Development Workflow
|
|
428
|
+
|
|
429
|
+
**Stage A: Local Development & Testing**
|
|
430
|
+
```bash
|
|
431
|
+
# Setup development environment
|
|
432
|
+
uv sync && uv pip install -e .
|
|
433
|
+
|
|
434
|
+
# Run development tests
|
|
435
|
+
./scripts/dev-test.sh
|
|
436
|
+
|
|
437
|
+
# Test CLI locally
|
|
438
|
+
uv run mcp-vector-search version
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
**Stage B: Local Deployment Testing**
|
|
442
|
+
```bash
|
|
443
|
+
# Build and test clean deployment
|
|
444
|
+
./scripts/deploy-test.sh
|
|
445
|
+
|
|
446
|
+
# Test on other projects
|
|
447
|
+
cd ~/other-project
|
|
448
|
+
mcp-vector-search init && mcp-vector-search index
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
**Stage C: PyPI Publication**
|
|
452
|
+
```bash
|
|
453
|
+
# Publish to PyPI
|
|
454
|
+
./scripts/publish.sh
|
|
455
|
+
|
|
456
|
+
# Verify published version
|
|
457
|
+
pip install mcp-vector-search --upgrade
|
|
458
|
+
```
|
|
459
|
+
|
|
460
|
+
### Quick Reference
|
|
461
|
+
```bash
|
|
462
|
+
./scripts/workflow.sh # Show workflow overview
|
|
463
|
+
```
|
|
464
|
+
|
|
465
|
+
See [DEVELOPMENT.md](DEVELOPMENT.md) for detailed development instructions.
|
|
466
|
+
|
|
467
|
+
## 📚 Documentation
|
|
468
|
+
|
|
469
|
+
For comprehensive documentation, see **[CLAUDE.md](CLAUDE.md)** - the main documentation index.
|
|
470
|
+
|
|
471
|
+
### Quick Links
|
|
472
|
+
- **[Installation & Deployment](docs/DEPLOY.md)** - Setup and deployment guide
|
|
473
|
+
- **[Project Structure](docs/STRUCTURE.md)** - Architecture and file organization
|
|
474
|
+
- **[Contributing Guidelines](docs/developer/CONTRIBUTING.md)** - How to contribute
|
|
475
|
+
- **[API Reference](docs/developer/API.md)** - Internal API documentation
|
|
476
|
+
- **[Testing Guide](docs/developer/TESTING.md)** - Testing strategies
|
|
477
|
+
- **[Code Quality](docs/developer/LINTING.md)** - Linting and formatting
|
|
478
|
+
- **[Versioning](docs/VERSIONING.md)** - Version management
|
|
479
|
+
- **[Releases](docs/RELEASES.md)** - Release process
|
|
480
|
+
|
|
481
|
+
## 🤝 Contributing
|
|
482
|
+
|
|
483
|
+
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
|
484
|
+
|
|
319
485
|
## 📄 License
|
|
320
486
|
|
|
321
487
|
MIT License - see [LICENSE](LICENSE) file for details.
|