mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +3 -2
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/config.py +88 -40
- mcp_vector_search/cli/commands/index.py +198 -52
- mcp_vector_search/cli/commands/init.py +472 -58
- mcp_vector_search/cli/commands/install.py +284 -0
- mcp_vector_search/cli/commands/mcp.py +495 -0
- mcp_vector_search/cli/commands/search.py +241 -87
- mcp_vector_search/cli/commands/status.py +184 -58
- mcp_vector_search/cli/commands/watch.py +34 -35
- mcp_vector_search/cli/didyoumean.py +184 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +292 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +163 -26
- mcp_vector_search/cli/output.py +63 -45
- mcp_vector_search/config/defaults.py +50 -36
- mcp_vector_search/config/settings.py +49 -35
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/connection_pool.py +322 -0
- mcp_vector_search/core/database.py +335 -25
- mcp_vector_search/core/embeddings.py +73 -29
- mcp_vector_search/core/exceptions.py +19 -2
- mcp_vector_search/core/factory.py +310 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +237 -73
- mcp_vector_search/core/models.py +21 -19
- mcp_vector_search/core/project.py +73 -58
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +574 -86
- mcp_vector_search/core/watcher.py +48 -46
- mcp_vector_search/mcp/__init__.py +4 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +701 -0
- mcp_vector_search/parsers/base.py +30 -31
- mcp_vector_search/parsers/javascript.py +74 -48
- mcp_vector_search/parsers/python.py +57 -49
- mcp_vector_search/parsers/registry.py +47 -32
- mcp_vector_search/parsers/text.py +179 -0
- mcp_vector_search/utils/__init__.py +40 -0
- mcp_vector_search/utils/gitignore.py +229 -0
- mcp_vector_search/utils/timing.py +334 -0
- mcp_vector_search/utils/version.py +47 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/METADATA +173 -7
- mcp_vector_search-0.4.11.dist-info/RECORD +54 -0
- mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/WHEEL +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/entry_points.txt +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,19 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import List, Optional
|
|
6
5
|
|
|
7
6
|
import typer
|
|
8
7
|
from loguru import logger
|
|
9
8
|
|
|
10
9
|
from ...config.defaults import get_default_cache_path
|
|
11
10
|
from ...core.database import ChromaVectorDatabase
|
|
12
|
-
from ...core.embeddings import create_embedding_function
|
|
11
|
+
from ...core.embeddings import create_embedding_function
|
|
13
12
|
from ...core.exceptions import ProjectNotFoundError
|
|
14
13
|
from ...core.indexer import SemanticIndexer
|
|
15
14
|
from ...core.project import ProjectManager
|
|
16
15
|
from ..output import (
|
|
17
|
-
console,
|
|
18
16
|
create_progress,
|
|
19
17
|
print_error,
|
|
20
18
|
print_index_stats,
|
|
@@ -40,7 +38,7 @@ def main(
|
|
|
40
38
|
"--incremental/--full",
|
|
41
39
|
help="Use incremental indexing (skip unchanged files)",
|
|
42
40
|
),
|
|
43
|
-
extensions:
|
|
41
|
+
extensions: str | None = typer.Option(
|
|
44
42
|
None,
|
|
45
43
|
"--extensions",
|
|
46
44
|
"-e",
|
|
@@ -62,10 +60,10 @@ def main(
|
|
|
62
60
|
),
|
|
63
61
|
) -> None:
|
|
64
62
|
"""Index your codebase for semantic search.
|
|
65
|
-
|
|
63
|
+
|
|
66
64
|
This command parses your code files using Tree-sitter, generates embeddings
|
|
67
65
|
using the configured model, and stores them in ChromaDB for fast semantic search.
|
|
68
|
-
|
|
66
|
+
|
|
69
67
|
Examples:
|
|
70
68
|
mcp-vector-search index
|
|
71
69
|
mcp-vector-search index --force --extensions .py,.js
|
|
@@ -73,18 +71,20 @@ def main(
|
|
|
73
71
|
"""
|
|
74
72
|
try:
|
|
75
73
|
project_root = ctx.obj.get("project_root") or Path.cwd()
|
|
76
|
-
|
|
74
|
+
|
|
77
75
|
# Run async indexing
|
|
78
|
-
asyncio.run(
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
76
|
+
asyncio.run(
|
|
77
|
+
run_indexing(
|
|
78
|
+
project_root=project_root,
|
|
79
|
+
watch=watch,
|
|
80
|
+
incremental=incremental,
|
|
81
|
+
extensions=extensions,
|
|
82
|
+
force_reindex=force,
|
|
83
|
+
batch_size=batch_size,
|
|
84
|
+
show_progress=True,
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
88
|
except KeyboardInterrupt:
|
|
89
89
|
print_info("Indexing interrupted by user")
|
|
90
90
|
raise typer.Exit(0)
|
|
@@ -98,7 +98,7 @@ async def run_indexing(
|
|
|
98
98
|
project_root: Path,
|
|
99
99
|
watch: bool = False,
|
|
100
100
|
incremental: bool = True,
|
|
101
|
-
extensions:
|
|
101
|
+
extensions: str | None = None,
|
|
102
102
|
force_reindex: bool = False,
|
|
103
103
|
batch_size: int = 32,
|
|
104
104
|
show_progress: bool = True,
|
|
@@ -106,52 +106,56 @@ async def run_indexing(
|
|
|
106
106
|
"""Run the indexing process."""
|
|
107
107
|
# Load project configuration
|
|
108
108
|
project_manager = ProjectManager(project_root)
|
|
109
|
-
|
|
109
|
+
|
|
110
110
|
if not project_manager.is_initialized():
|
|
111
111
|
raise ProjectNotFoundError(
|
|
112
112
|
f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
|
|
113
113
|
)
|
|
114
|
-
|
|
114
|
+
|
|
115
115
|
config = project_manager.load_config()
|
|
116
|
-
|
|
116
|
+
|
|
117
117
|
# Override extensions if provided
|
|
118
118
|
file_extensions = config.file_extensions
|
|
119
119
|
if extensions:
|
|
120
120
|
file_extensions = [ext.strip() for ext in extensions.split(",")]
|
|
121
|
-
file_extensions = [
|
|
122
|
-
|
|
121
|
+
file_extensions = [
|
|
122
|
+
ext if ext.startswith(".") else f".{ext}" for ext in file_extensions
|
|
123
|
+
]
|
|
124
|
+
|
|
123
125
|
print_info(f"Indexing project: {project_root}")
|
|
124
126
|
print_info(f"File extensions: {', '.join(file_extensions)}")
|
|
125
127
|
print_info(f"Embedding model: {config.embedding_model}")
|
|
126
|
-
|
|
128
|
+
|
|
127
129
|
# Setup embedding function and cache
|
|
128
|
-
cache_dir =
|
|
130
|
+
cache_dir = (
|
|
131
|
+
get_default_cache_path(project_root) if config.cache_embeddings else None
|
|
132
|
+
)
|
|
129
133
|
embedding_function, cache = create_embedding_function(
|
|
130
134
|
model_name=config.embedding_model,
|
|
131
135
|
cache_dir=cache_dir,
|
|
132
136
|
cache_size=config.max_cache_size,
|
|
133
137
|
)
|
|
134
|
-
|
|
138
|
+
|
|
135
139
|
# Setup database
|
|
136
140
|
database = ChromaVectorDatabase(
|
|
137
141
|
persist_directory=config.index_path,
|
|
138
142
|
embedding_function=embedding_function,
|
|
139
143
|
)
|
|
140
|
-
|
|
144
|
+
|
|
141
145
|
# Setup indexer
|
|
142
146
|
indexer = SemanticIndexer(
|
|
143
147
|
database=database,
|
|
144
148
|
project_root=project_root,
|
|
145
149
|
file_extensions=file_extensions,
|
|
146
150
|
)
|
|
147
|
-
|
|
151
|
+
|
|
148
152
|
try:
|
|
149
153
|
async with database:
|
|
150
154
|
if watch:
|
|
151
155
|
await _run_watch_mode(indexer, show_progress)
|
|
152
156
|
else:
|
|
153
157
|
await _run_batch_indexing(indexer, force_reindex, show_progress)
|
|
154
|
-
|
|
158
|
+
|
|
155
159
|
except Exception as e:
|
|
156
160
|
logger.error(f"Indexing error: {e}")
|
|
157
161
|
raise
|
|
@@ -166,22 +170,22 @@ async def _run_batch_indexing(
|
|
|
166
170
|
if show_progress:
|
|
167
171
|
with create_progress() as progress:
|
|
168
172
|
task = progress.add_task("Indexing files...", total=None)
|
|
169
|
-
|
|
173
|
+
|
|
170
174
|
# Start indexing
|
|
171
175
|
indexed_count = await indexer.index_project(
|
|
172
176
|
force_reindex=force_reindex,
|
|
173
177
|
show_progress=False, # We handle progress here
|
|
174
178
|
)
|
|
175
|
-
|
|
179
|
+
|
|
176
180
|
progress.update(task, completed=indexed_count, total=indexed_count)
|
|
177
181
|
else:
|
|
178
182
|
indexed_count = await indexer.index_project(
|
|
179
183
|
force_reindex=force_reindex,
|
|
180
184
|
show_progress=show_progress,
|
|
181
185
|
)
|
|
182
|
-
|
|
186
|
+
|
|
183
187
|
print_success(f"Indexed {indexed_count} files")
|
|
184
|
-
|
|
188
|
+
|
|
185
189
|
# Show statistics
|
|
186
190
|
stats = await indexer.get_indexing_stats()
|
|
187
191
|
print_index_stats(stats)
|
|
@@ -190,11 +194,11 @@ async def _run_batch_indexing(
|
|
|
190
194
|
async def _run_watch_mode(indexer: SemanticIndexer, show_progress: bool) -> None:
|
|
191
195
|
"""Run indexing in watch mode."""
|
|
192
196
|
print_info("Starting watch mode - press Ctrl+C to stop")
|
|
193
|
-
|
|
197
|
+
|
|
194
198
|
# TODO: Implement file watching with incremental updates
|
|
195
199
|
# This would use the watchdog library to monitor file changes
|
|
196
200
|
# and call indexer.reindex_file() for changed files
|
|
197
|
-
|
|
201
|
+
|
|
198
202
|
print_error("Watch mode not yet implemented")
|
|
199
203
|
raise NotImplementedError("Watch mode will be implemented in Phase 1B")
|
|
200
204
|
|
|
@@ -202,57 +206,196 @@ async def _run_watch_mode(indexer: SemanticIndexer, show_progress: bool) -> None
|
|
|
202
206
|
@index_app.command("reindex")
|
|
203
207
|
def reindex_file(
|
|
204
208
|
ctx: typer.Context,
|
|
205
|
-
file_path: Path = typer.Argument(
|
|
206
|
-
|
|
207
|
-
help="File to reindex",
|
|
209
|
+
file_path: Path | None = typer.Argument(
|
|
210
|
+
None,
|
|
211
|
+
help="File to reindex (optional - if not provided, reindexes entire project)",
|
|
208
212
|
exists=True,
|
|
209
213
|
file_okay=True,
|
|
210
214
|
dir_okay=False,
|
|
211
215
|
readable=True,
|
|
212
216
|
),
|
|
217
|
+
all: bool = typer.Option(
|
|
218
|
+
False,
|
|
219
|
+
"--all",
|
|
220
|
+
"-a",
|
|
221
|
+
help="Explicitly reindex entire project",
|
|
222
|
+
),
|
|
223
|
+
force: bool = typer.Option(
|
|
224
|
+
False,
|
|
225
|
+
"--force",
|
|
226
|
+
"-f",
|
|
227
|
+
help="Skip confirmation prompt when reindexing entire project",
|
|
228
|
+
),
|
|
213
229
|
) -> None:
|
|
214
|
-
"""Reindex
|
|
230
|
+
"""Reindex files in the project.
|
|
231
|
+
|
|
232
|
+
Can reindex a specific file or the entire project:
|
|
233
|
+
- Without arguments: reindexes entire project (with confirmation)
|
|
234
|
+
- With file path: reindexes specific file
|
|
235
|
+
- With --all flag: explicitly reindexes entire project
|
|
236
|
+
|
|
237
|
+
Examples:
|
|
238
|
+
mcp-vector-search index reindex # Reindex entire project
|
|
239
|
+
mcp-vector-search index reindex --all # Explicitly reindex entire project
|
|
240
|
+
mcp-vector-search index reindex src/main.py # Reindex specific file
|
|
241
|
+
mcp-vector-search index reindex --all --force # Reindex entire project without confirmation
|
|
242
|
+
"""
|
|
215
243
|
try:
|
|
216
244
|
project_root = ctx.obj.get("project_root") or Path.cwd()
|
|
245
|
+
|
|
246
|
+
# Determine what to reindex
|
|
247
|
+
if file_path is not None and all:
|
|
248
|
+
print_error("Cannot specify both a file path and --all flag")
|
|
249
|
+
raise typer.Exit(1)
|
|
217
250
|
|
|
218
|
-
|
|
219
|
-
|
|
251
|
+
if file_path is not None:
|
|
252
|
+
# Reindex specific file
|
|
253
|
+
asyncio.run(_reindex_single_file(project_root, file_path))
|
|
254
|
+
else:
|
|
255
|
+
# Reindex entire project
|
|
256
|
+
if not force and not all:
|
|
257
|
+
from ..output import confirm_action
|
|
258
|
+
|
|
259
|
+
if not confirm_action(
|
|
260
|
+
"This will reindex the entire project. Continue?",
|
|
261
|
+
default=False
|
|
262
|
+
):
|
|
263
|
+
print_info("Reindex operation cancelled")
|
|
264
|
+
raise typer.Exit(0)
|
|
265
|
+
|
|
266
|
+
# Use the full project reindexing
|
|
267
|
+
asyncio.run(_reindex_entire_project(project_root))
|
|
268
|
+
|
|
269
|
+
except typer.Exit:
|
|
270
|
+
# Re-raise Exit exceptions without logging as errors
|
|
271
|
+
raise
|
|
220
272
|
except Exception as e:
|
|
221
273
|
logger.error(f"Reindexing failed: {e}")
|
|
222
274
|
print_error(f"Reindexing failed: {e}")
|
|
223
275
|
raise typer.Exit(1)
|
|
224
276
|
|
|
225
277
|
|
|
226
|
-
async def
|
|
227
|
-
"""Reindex
|
|
278
|
+
async def _reindex_entire_project(project_root: Path) -> None:
|
|
279
|
+
"""Reindex the entire project."""
|
|
280
|
+
print_info("Starting full project reindex...")
|
|
281
|
+
|
|
228
282
|
# Load project configuration
|
|
229
283
|
project_manager = ProjectManager(project_root)
|
|
284
|
+
|
|
285
|
+
if not project_manager.is_initialized():
|
|
286
|
+
raise ProjectNotFoundError(
|
|
287
|
+
f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
|
|
288
|
+
)
|
|
289
|
+
|
|
230
290
|
config = project_manager.load_config()
|
|
231
291
|
|
|
232
|
-
|
|
292
|
+
print_info(f"Project: {project_root}")
|
|
293
|
+
print_info(f"File extensions: {', '.join(config.file_extensions)}")
|
|
294
|
+
print_info(f"Embedding model: {config.embedding_model}")
|
|
295
|
+
|
|
296
|
+
# Setup embedding function and cache
|
|
297
|
+
cache_dir = (
|
|
298
|
+
get_default_cache_path(project_root) if config.cache_embeddings else None
|
|
299
|
+
)
|
|
233
300
|
embedding_function, cache = create_embedding_function(
|
|
234
301
|
model_name=config.embedding_model,
|
|
235
|
-
cache_dir=
|
|
302
|
+
cache_dir=cache_dir,
|
|
303
|
+
cache_size=config.max_cache_size,
|
|
236
304
|
)
|
|
237
305
|
|
|
306
|
+
# Setup database
|
|
238
307
|
database = ChromaVectorDatabase(
|
|
239
308
|
persist_directory=config.index_path,
|
|
240
309
|
embedding_function=embedding_function,
|
|
241
310
|
)
|
|
242
311
|
|
|
312
|
+
# Setup indexer
|
|
243
313
|
indexer = SemanticIndexer(
|
|
244
314
|
database=database,
|
|
245
315
|
project_root=project_root,
|
|
246
316
|
file_extensions=config.file_extensions,
|
|
247
317
|
)
|
|
248
318
|
|
|
319
|
+
try:
|
|
320
|
+
async with database:
|
|
321
|
+
# First, clean the existing index
|
|
322
|
+
print_info("Clearing existing index...")
|
|
323
|
+
await database.reset()
|
|
324
|
+
|
|
325
|
+
# Then reindex everything with progress
|
|
326
|
+
with create_progress() as progress:
|
|
327
|
+
task = progress.add_task("Reindexing files...", total=None)
|
|
328
|
+
|
|
329
|
+
# Force reindex all files
|
|
330
|
+
indexed_count = await indexer.index_project(
|
|
331
|
+
force_reindex=True, # Force reindexing
|
|
332
|
+
show_progress=False, # We handle progress here
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
progress.update(task, completed=indexed_count, total=indexed_count)
|
|
336
|
+
|
|
337
|
+
print_success(f"Successfully reindexed {indexed_count} files")
|
|
338
|
+
|
|
339
|
+
# Show statistics
|
|
340
|
+
stats = await indexer.get_indexing_stats()
|
|
341
|
+
print_index_stats(stats)
|
|
342
|
+
|
|
343
|
+
except Exception as e:
|
|
344
|
+
logger.error(f"Full reindex error: {e}")
|
|
345
|
+
raise
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
async def _reindex_single_file(project_root: Path, file_path: Path) -> None:
|
|
349
|
+
"""Reindex a single file."""
|
|
350
|
+
# Load project configuration
|
|
351
|
+
project_manager = ProjectManager(project_root)
|
|
352
|
+
config = project_manager.load_config()
|
|
353
|
+
|
|
354
|
+
# Make file path absolute if it's not already
|
|
355
|
+
if not file_path.is_absolute():
|
|
356
|
+
file_path = file_path.resolve()
|
|
357
|
+
|
|
358
|
+
# Check if file exists
|
|
359
|
+
if not file_path.exists():
|
|
360
|
+
print_error(f"File not found: {file_path}")
|
|
361
|
+
return
|
|
362
|
+
|
|
363
|
+
# Check if file is within project root
|
|
364
|
+
try:
|
|
365
|
+
file_path.relative_to(project_root)
|
|
366
|
+
except ValueError:
|
|
367
|
+
print_error(f"File {file_path} is not within project root {project_root}")
|
|
368
|
+
return
|
|
369
|
+
|
|
370
|
+
# Setup components
|
|
371
|
+
embedding_function, cache = create_embedding_function(
|
|
372
|
+
model_name=config.embedding_model,
|
|
373
|
+
cache_dir=get_default_cache_path(project_root)
|
|
374
|
+
if config.cache_embeddings
|
|
375
|
+
else None,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
database = ChromaVectorDatabase(
|
|
379
|
+
persist_directory=config.index_path,
|
|
380
|
+
embedding_function=embedding_function,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
indexer = SemanticIndexer(
|
|
384
|
+
database=database,
|
|
385
|
+
project_root=project_root,
|
|
386
|
+
file_extensions=config.file_extensions,
|
|
387
|
+
)
|
|
388
|
+
|
|
249
389
|
async with database:
|
|
250
390
|
success = await indexer.reindex_file(file_path)
|
|
251
|
-
|
|
391
|
+
|
|
252
392
|
if success:
|
|
253
393
|
print_success(f"Reindexed: {file_path}")
|
|
254
394
|
else:
|
|
255
395
|
print_error(f"Failed to reindex: {file_path}")
|
|
396
|
+
# Check if file extension is in the list of indexable extensions
|
|
397
|
+
if file_path.suffix not in config.file_extensions:
|
|
398
|
+
print_info(f"Note: {file_path.suffix} is not in the configured file extensions: {', '.join(config.file_extensions)}")
|
|
256
399
|
|
|
257
400
|
|
|
258
401
|
@index_app.command("clean")
|
|
@@ -268,15 +411,18 @@ def clean_index(
|
|
|
268
411
|
"""Clean the search index (remove all indexed data)."""
|
|
269
412
|
try:
|
|
270
413
|
project_root = ctx.obj.get("project_root") or Path.cwd()
|
|
271
|
-
|
|
414
|
+
|
|
272
415
|
if not confirm:
|
|
273
416
|
from ..output import confirm_action
|
|
274
|
-
|
|
417
|
+
|
|
418
|
+
if not confirm_action(
|
|
419
|
+
"This will delete all indexed data. Continue?", default=False
|
|
420
|
+
):
|
|
275
421
|
print_info("Clean operation cancelled")
|
|
276
422
|
raise typer.Exit(0)
|
|
277
|
-
|
|
423
|
+
|
|
278
424
|
asyncio.run(_clean_index(project_root))
|
|
279
|
-
|
|
425
|
+
|
|
280
426
|
except Exception as e:
|
|
281
427
|
logger.error(f"Clean failed: {e}")
|
|
282
428
|
print_error(f"Clean failed: {e}")
|
|
@@ -287,14 +433,14 @@ async def _clean_index(project_root: Path) -> None:
|
|
|
287
433
|
"""Clean the search index."""
|
|
288
434
|
project_manager = ProjectManager(project_root)
|
|
289
435
|
config = project_manager.load_config()
|
|
290
|
-
|
|
436
|
+
|
|
291
437
|
# Setup database
|
|
292
438
|
embedding_function, _ = create_embedding_function(config.embedding_model)
|
|
293
439
|
database = ChromaVectorDatabase(
|
|
294
440
|
persist_directory=config.index_path,
|
|
295
441
|
embedding_function=embedding_function,
|
|
296
442
|
)
|
|
297
|
-
|
|
443
|
+
|
|
298
444
|
async with database:
|
|
299
445
|
await database.reset()
|
|
300
446
|
print_success("Index cleaned successfully")
|