mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (49) hide show
  1. mcp_vector_search/__init__.py +3 -2
  2. mcp_vector_search/cli/commands/auto_index.py +397 -0
  3. mcp_vector_search/cli/commands/config.py +88 -40
  4. mcp_vector_search/cli/commands/index.py +198 -52
  5. mcp_vector_search/cli/commands/init.py +472 -58
  6. mcp_vector_search/cli/commands/install.py +284 -0
  7. mcp_vector_search/cli/commands/mcp.py +495 -0
  8. mcp_vector_search/cli/commands/search.py +241 -87
  9. mcp_vector_search/cli/commands/status.py +184 -58
  10. mcp_vector_search/cli/commands/watch.py +34 -35
  11. mcp_vector_search/cli/didyoumean.py +184 -0
  12. mcp_vector_search/cli/export.py +320 -0
  13. mcp_vector_search/cli/history.py +292 -0
  14. mcp_vector_search/cli/interactive.py +342 -0
  15. mcp_vector_search/cli/main.py +163 -26
  16. mcp_vector_search/cli/output.py +63 -45
  17. mcp_vector_search/config/defaults.py +50 -36
  18. mcp_vector_search/config/settings.py +49 -35
  19. mcp_vector_search/core/auto_indexer.py +298 -0
  20. mcp_vector_search/core/connection_pool.py +322 -0
  21. mcp_vector_search/core/database.py +335 -25
  22. mcp_vector_search/core/embeddings.py +73 -29
  23. mcp_vector_search/core/exceptions.py +19 -2
  24. mcp_vector_search/core/factory.py +310 -0
  25. mcp_vector_search/core/git_hooks.py +345 -0
  26. mcp_vector_search/core/indexer.py +237 -73
  27. mcp_vector_search/core/models.py +21 -19
  28. mcp_vector_search/core/project.py +73 -58
  29. mcp_vector_search/core/scheduler.py +330 -0
  30. mcp_vector_search/core/search.py +574 -86
  31. mcp_vector_search/core/watcher.py +48 -46
  32. mcp_vector_search/mcp/__init__.py +4 -0
  33. mcp_vector_search/mcp/__main__.py +25 -0
  34. mcp_vector_search/mcp/server.py +701 -0
  35. mcp_vector_search/parsers/base.py +30 -31
  36. mcp_vector_search/parsers/javascript.py +74 -48
  37. mcp_vector_search/parsers/python.py +57 -49
  38. mcp_vector_search/parsers/registry.py +47 -32
  39. mcp_vector_search/parsers/text.py +179 -0
  40. mcp_vector_search/utils/__init__.py +40 -0
  41. mcp_vector_search/utils/gitignore.py +229 -0
  42. mcp_vector_search/utils/timing.py +334 -0
  43. mcp_vector_search/utils/version.py +47 -0
  44. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/METADATA +173 -7
  45. mcp_vector_search-0.4.11.dist-info/RECORD +54 -0
  46. mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
  47. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/WHEEL +0 -0
  48. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/entry_points.txt +0 -0
  49. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/licenses/LICENSE +0 -0
@@ -2,19 +2,17 @@
2
2
 
3
3
  import asyncio
4
4
  from pathlib import Path
5
- from typing import List, Optional
6
5
 
7
6
  import typer
8
7
  from loguru import logger
9
8
 
10
9
  from ...config.defaults import get_default_cache_path
11
10
  from ...core.database import ChromaVectorDatabase
12
- from ...core.embeddings import create_embedding_function, BatchEmbeddingProcessor
11
+ from ...core.embeddings import create_embedding_function
13
12
  from ...core.exceptions import ProjectNotFoundError
14
13
  from ...core.indexer import SemanticIndexer
15
14
  from ...core.project import ProjectManager
16
15
  from ..output import (
17
- console,
18
16
  create_progress,
19
17
  print_error,
20
18
  print_index_stats,
@@ -40,7 +38,7 @@ def main(
40
38
  "--incremental/--full",
41
39
  help="Use incremental indexing (skip unchanged files)",
42
40
  ),
43
- extensions: Optional[str] = typer.Option(
41
+ extensions: str | None = typer.Option(
44
42
  None,
45
43
  "--extensions",
46
44
  "-e",
@@ -62,10 +60,10 @@ def main(
62
60
  ),
63
61
  ) -> None:
64
62
  """Index your codebase for semantic search.
65
-
63
+
66
64
  This command parses your code files using Tree-sitter, generates embeddings
67
65
  using the configured model, and stores them in ChromaDB for fast semantic search.
68
-
66
+
69
67
  Examples:
70
68
  mcp-vector-search index
71
69
  mcp-vector-search index --force --extensions .py,.js
@@ -73,18 +71,20 @@ def main(
73
71
  """
74
72
  try:
75
73
  project_root = ctx.obj.get("project_root") or Path.cwd()
76
-
74
+
77
75
  # Run async indexing
78
- asyncio.run(run_indexing(
79
- project_root=project_root,
80
- watch=watch,
81
- incremental=incremental,
82
- extensions=extensions,
83
- force_reindex=force,
84
- batch_size=batch_size,
85
- show_progress=True,
86
- ))
87
-
76
+ asyncio.run(
77
+ run_indexing(
78
+ project_root=project_root,
79
+ watch=watch,
80
+ incremental=incremental,
81
+ extensions=extensions,
82
+ force_reindex=force,
83
+ batch_size=batch_size,
84
+ show_progress=True,
85
+ )
86
+ )
87
+
88
88
  except KeyboardInterrupt:
89
89
  print_info("Indexing interrupted by user")
90
90
  raise typer.Exit(0)
@@ -98,7 +98,7 @@ async def run_indexing(
98
98
  project_root: Path,
99
99
  watch: bool = False,
100
100
  incremental: bool = True,
101
- extensions: Optional[str] = None,
101
+ extensions: str | None = None,
102
102
  force_reindex: bool = False,
103
103
  batch_size: int = 32,
104
104
  show_progress: bool = True,
@@ -106,52 +106,56 @@ async def run_indexing(
106
106
  """Run the indexing process."""
107
107
  # Load project configuration
108
108
  project_manager = ProjectManager(project_root)
109
-
109
+
110
110
  if not project_manager.is_initialized():
111
111
  raise ProjectNotFoundError(
112
112
  f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
113
113
  )
114
-
114
+
115
115
  config = project_manager.load_config()
116
-
116
+
117
117
  # Override extensions if provided
118
118
  file_extensions = config.file_extensions
119
119
  if extensions:
120
120
  file_extensions = [ext.strip() for ext in extensions.split(",")]
121
- file_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in file_extensions]
122
-
121
+ file_extensions = [
122
+ ext if ext.startswith(".") else f".{ext}" for ext in file_extensions
123
+ ]
124
+
123
125
  print_info(f"Indexing project: {project_root}")
124
126
  print_info(f"File extensions: {', '.join(file_extensions)}")
125
127
  print_info(f"Embedding model: {config.embedding_model}")
126
-
128
+
127
129
  # Setup embedding function and cache
128
- cache_dir = get_default_cache_path(project_root) if config.cache_embeddings else None
130
+ cache_dir = (
131
+ get_default_cache_path(project_root) if config.cache_embeddings else None
132
+ )
129
133
  embedding_function, cache = create_embedding_function(
130
134
  model_name=config.embedding_model,
131
135
  cache_dir=cache_dir,
132
136
  cache_size=config.max_cache_size,
133
137
  )
134
-
138
+
135
139
  # Setup database
136
140
  database = ChromaVectorDatabase(
137
141
  persist_directory=config.index_path,
138
142
  embedding_function=embedding_function,
139
143
  )
140
-
144
+
141
145
  # Setup indexer
142
146
  indexer = SemanticIndexer(
143
147
  database=database,
144
148
  project_root=project_root,
145
149
  file_extensions=file_extensions,
146
150
  )
147
-
151
+
148
152
  try:
149
153
  async with database:
150
154
  if watch:
151
155
  await _run_watch_mode(indexer, show_progress)
152
156
  else:
153
157
  await _run_batch_indexing(indexer, force_reindex, show_progress)
154
-
158
+
155
159
  except Exception as e:
156
160
  logger.error(f"Indexing error: {e}")
157
161
  raise
@@ -166,22 +170,22 @@ async def _run_batch_indexing(
166
170
  if show_progress:
167
171
  with create_progress() as progress:
168
172
  task = progress.add_task("Indexing files...", total=None)
169
-
173
+
170
174
  # Start indexing
171
175
  indexed_count = await indexer.index_project(
172
176
  force_reindex=force_reindex,
173
177
  show_progress=False, # We handle progress here
174
178
  )
175
-
179
+
176
180
  progress.update(task, completed=indexed_count, total=indexed_count)
177
181
  else:
178
182
  indexed_count = await indexer.index_project(
179
183
  force_reindex=force_reindex,
180
184
  show_progress=show_progress,
181
185
  )
182
-
186
+
183
187
  print_success(f"Indexed {indexed_count} files")
184
-
188
+
185
189
  # Show statistics
186
190
  stats = await indexer.get_indexing_stats()
187
191
  print_index_stats(stats)
@@ -190,11 +194,11 @@ async def _run_batch_indexing(
190
194
  async def _run_watch_mode(indexer: SemanticIndexer, show_progress: bool) -> None:
191
195
  """Run indexing in watch mode."""
192
196
  print_info("Starting watch mode - press Ctrl+C to stop")
193
-
197
+
194
198
  # TODO: Implement file watching with incremental updates
195
199
  # This would use the watchdog library to monitor file changes
196
200
  # and call indexer.reindex_file() for changed files
197
-
201
+
198
202
  print_error("Watch mode not yet implemented")
199
203
  raise NotImplementedError("Watch mode will be implemented in Phase 1B")
200
204
 
@@ -202,57 +206,196 @@ async def _run_watch_mode(indexer: SemanticIndexer, show_progress: bool) -> None
202
206
  @index_app.command("reindex")
203
207
  def reindex_file(
204
208
  ctx: typer.Context,
205
- file_path: Path = typer.Argument(
206
- ...,
207
- help="File to reindex",
209
+ file_path: Path | None = typer.Argument(
210
+ None,
211
+ help="File to reindex (optional - if not provided, reindexes entire project)",
208
212
  exists=True,
209
213
  file_okay=True,
210
214
  dir_okay=False,
211
215
  readable=True,
212
216
  ),
217
+ all: bool = typer.Option(
218
+ False,
219
+ "--all",
220
+ "-a",
221
+ help="Explicitly reindex entire project",
222
+ ),
223
+ force: bool = typer.Option(
224
+ False,
225
+ "--force",
226
+ "-f",
227
+ help="Skip confirmation prompt when reindexing entire project",
228
+ ),
213
229
  ) -> None:
214
- """Reindex a specific file."""
230
+ """Reindex files in the project.
231
+
232
+ Can reindex a specific file or the entire project:
233
+ - Without arguments: reindexes entire project (with confirmation)
234
+ - With file path: reindexes specific file
235
+ - With --all flag: explicitly reindexes entire project
236
+
237
+ Examples:
238
+ mcp-vector-search index reindex # Reindex entire project
239
+ mcp-vector-search index reindex --all # Explicitly reindex entire project
240
+ mcp-vector-search index reindex src/main.py # Reindex specific file
241
+ mcp-vector-search index reindex --all --force # Reindex entire project without confirmation
242
+ """
215
243
  try:
216
244
  project_root = ctx.obj.get("project_root") or Path.cwd()
245
+
246
+ # Determine what to reindex
247
+ if file_path is not None and all:
248
+ print_error("Cannot specify both a file path and --all flag")
249
+ raise typer.Exit(1)
217
250
 
218
- asyncio.run(_reindex_single_file(project_root, file_path))
219
-
251
+ if file_path is not None:
252
+ # Reindex specific file
253
+ asyncio.run(_reindex_single_file(project_root, file_path))
254
+ else:
255
+ # Reindex entire project
256
+ if not force and not all:
257
+ from ..output import confirm_action
258
+
259
+ if not confirm_action(
260
+ "This will reindex the entire project. Continue?",
261
+ default=False
262
+ ):
263
+ print_info("Reindex operation cancelled")
264
+ raise typer.Exit(0)
265
+
266
+ # Use the full project reindexing
267
+ asyncio.run(_reindex_entire_project(project_root))
268
+
269
+ except typer.Exit:
270
+ # Re-raise Exit exceptions without logging as errors
271
+ raise
220
272
  except Exception as e:
221
273
  logger.error(f"Reindexing failed: {e}")
222
274
  print_error(f"Reindexing failed: {e}")
223
275
  raise typer.Exit(1)
224
276
 
225
277
 
226
- async def _reindex_single_file(project_root: Path, file_path: Path) -> None:
227
- """Reindex a single file."""
278
+ async def _reindex_entire_project(project_root: Path) -> None:
279
+ """Reindex the entire project."""
280
+ print_info("Starting full project reindex...")
281
+
228
282
  # Load project configuration
229
283
  project_manager = ProjectManager(project_root)
284
+
285
+ if not project_manager.is_initialized():
286
+ raise ProjectNotFoundError(
287
+ f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
288
+ )
289
+
230
290
  config = project_manager.load_config()
231
291
 
232
- # Setup components
292
+ print_info(f"Project: {project_root}")
293
+ print_info(f"File extensions: {', '.join(config.file_extensions)}")
294
+ print_info(f"Embedding model: {config.embedding_model}")
295
+
296
+ # Setup embedding function and cache
297
+ cache_dir = (
298
+ get_default_cache_path(project_root) if config.cache_embeddings else None
299
+ )
233
300
  embedding_function, cache = create_embedding_function(
234
301
  model_name=config.embedding_model,
235
- cache_dir=get_default_cache_path(project_root) if config.cache_embeddings else None,
302
+ cache_dir=cache_dir,
303
+ cache_size=config.max_cache_size,
236
304
  )
237
305
 
306
+ # Setup database
238
307
  database = ChromaVectorDatabase(
239
308
  persist_directory=config.index_path,
240
309
  embedding_function=embedding_function,
241
310
  )
242
311
 
312
+ # Setup indexer
243
313
  indexer = SemanticIndexer(
244
314
  database=database,
245
315
  project_root=project_root,
246
316
  file_extensions=config.file_extensions,
247
317
  )
248
318
 
319
+ try:
320
+ async with database:
321
+ # First, clean the existing index
322
+ print_info("Clearing existing index...")
323
+ await database.reset()
324
+
325
+ # Then reindex everything with progress
326
+ with create_progress() as progress:
327
+ task = progress.add_task("Reindexing files...", total=None)
328
+
329
+ # Force reindex all files
330
+ indexed_count = await indexer.index_project(
331
+ force_reindex=True, # Force reindexing
332
+ show_progress=False, # We handle progress here
333
+ )
334
+
335
+ progress.update(task, completed=indexed_count, total=indexed_count)
336
+
337
+ print_success(f"Successfully reindexed {indexed_count} files")
338
+
339
+ # Show statistics
340
+ stats = await indexer.get_indexing_stats()
341
+ print_index_stats(stats)
342
+
343
+ except Exception as e:
344
+ logger.error(f"Full reindex error: {e}")
345
+ raise
346
+
347
+
348
+ async def _reindex_single_file(project_root: Path, file_path: Path) -> None:
349
+ """Reindex a single file."""
350
+ # Load project configuration
351
+ project_manager = ProjectManager(project_root)
352
+ config = project_manager.load_config()
353
+
354
+ # Make file path absolute if it's not already
355
+ if not file_path.is_absolute():
356
+ file_path = file_path.resolve()
357
+
358
+ # Check if file exists
359
+ if not file_path.exists():
360
+ print_error(f"File not found: {file_path}")
361
+ return
362
+
363
+ # Check if file is within project root
364
+ try:
365
+ file_path.relative_to(project_root)
366
+ except ValueError:
367
+ print_error(f"File {file_path} is not within project root {project_root}")
368
+ return
369
+
370
+ # Setup components
371
+ embedding_function, cache = create_embedding_function(
372
+ model_name=config.embedding_model,
373
+ cache_dir=get_default_cache_path(project_root)
374
+ if config.cache_embeddings
375
+ else None,
376
+ )
377
+
378
+ database = ChromaVectorDatabase(
379
+ persist_directory=config.index_path,
380
+ embedding_function=embedding_function,
381
+ )
382
+
383
+ indexer = SemanticIndexer(
384
+ database=database,
385
+ project_root=project_root,
386
+ file_extensions=config.file_extensions,
387
+ )
388
+
249
389
  async with database:
250
390
  success = await indexer.reindex_file(file_path)
251
-
391
+
252
392
  if success:
253
393
  print_success(f"Reindexed: {file_path}")
254
394
  else:
255
395
  print_error(f"Failed to reindex: {file_path}")
396
+ # Check if file extension is in the list of indexable extensions
397
+ if file_path.suffix not in config.file_extensions:
398
+ print_info(f"Note: {file_path.suffix} is not in the configured file extensions: {', '.join(config.file_extensions)}")
256
399
 
257
400
 
258
401
  @index_app.command("clean")
@@ -268,15 +411,18 @@ def clean_index(
268
411
  """Clean the search index (remove all indexed data)."""
269
412
  try:
270
413
  project_root = ctx.obj.get("project_root") or Path.cwd()
271
-
414
+
272
415
  if not confirm:
273
416
  from ..output import confirm_action
274
- if not confirm_action("This will delete all indexed data. Continue?", default=False):
417
+
418
+ if not confirm_action(
419
+ "This will delete all indexed data. Continue?", default=False
420
+ ):
275
421
  print_info("Clean operation cancelled")
276
422
  raise typer.Exit(0)
277
-
423
+
278
424
  asyncio.run(_clean_index(project_root))
279
-
425
+
280
426
  except Exception as e:
281
427
  logger.error(f"Clean failed: {e}")
282
428
  print_error(f"Clean failed: {e}")
@@ -287,14 +433,14 @@ async def _clean_index(project_root: Path) -> None:
287
433
  """Clean the search index."""
288
434
  project_manager = ProjectManager(project_root)
289
435
  config = project_manager.load_config()
290
-
436
+
291
437
  # Setup database
292
438
  embedding_function, _ = create_embedding_function(config.embedding_model)
293
439
  database = ChromaVectorDatabase(
294
440
  persist_directory=config.index_path,
295
441
  embedding_function=embedding_function,
296
442
  )
297
-
443
+
298
444
  async with database:
299
445
  await database.reset()
300
446
  print_success("Index cleaned successfully")