mcp-vector-search 0.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/config.py +393 -0
  6. mcp_vector_search/cli/commands/demo.py +358 -0
  7. mcp_vector_search/cli/commands/index.py +744 -0
  8. mcp_vector_search/cli/commands/init.py +645 -0
  9. mcp_vector_search/cli/commands/install.py +675 -0
  10. mcp_vector_search/cli/commands/install_old.py +696 -0
  11. mcp_vector_search/cli/commands/mcp.py +1182 -0
  12. mcp_vector_search/cli/commands/reset.py +393 -0
  13. mcp_vector_search/cli/commands/search.py +773 -0
  14. mcp_vector_search/cli/commands/status.py +549 -0
  15. mcp_vector_search/cli/commands/uninstall.py +485 -0
  16. mcp_vector_search/cli/commands/visualize.py +1467 -0
  17. mcp_vector_search/cli/commands/watch.py +287 -0
  18. mcp_vector_search/cli/didyoumean.py +500 -0
  19. mcp_vector_search/cli/export.py +320 -0
  20. mcp_vector_search/cli/history.py +295 -0
  21. mcp_vector_search/cli/interactive.py +342 -0
  22. mcp_vector_search/cli/main.py +461 -0
  23. mcp_vector_search/cli/output.py +412 -0
  24. mcp_vector_search/cli/suggestions.py +375 -0
  25. mcp_vector_search/config/__init__.py +1 -0
  26. mcp_vector_search/config/constants.py +24 -0
  27. mcp_vector_search/config/defaults.py +200 -0
  28. mcp_vector_search/config/settings.py +134 -0
  29. mcp_vector_search/core/__init__.py +1 -0
  30. mcp_vector_search/core/auto_indexer.py +298 -0
  31. mcp_vector_search/core/connection_pool.py +360 -0
  32. mcp_vector_search/core/database.py +1214 -0
  33. mcp_vector_search/core/directory_index.py +318 -0
  34. mcp_vector_search/core/embeddings.py +294 -0
  35. mcp_vector_search/core/exceptions.py +89 -0
  36. mcp_vector_search/core/factory.py +318 -0
  37. mcp_vector_search/core/git_hooks.py +345 -0
  38. mcp_vector_search/core/indexer.py +1002 -0
  39. mcp_vector_search/core/models.py +294 -0
  40. mcp_vector_search/core/project.py +333 -0
  41. mcp_vector_search/core/scheduler.py +330 -0
  42. mcp_vector_search/core/search.py +952 -0
  43. mcp_vector_search/core/watcher.py +322 -0
  44. mcp_vector_search/mcp/__init__.py +5 -0
  45. mcp_vector_search/mcp/__main__.py +25 -0
  46. mcp_vector_search/mcp/server.py +733 -0
  47. mcp_vector_search/parsers/__init__.py +8 -0
  48. mcp_vector_search/parsers/base.py +296 -0
  49. mcp_vector_search/parsers/dart.py +605 -0
  50. mcp_vector_search/parsers/html.py +413 -0
  51. mcp_vector_search/parsers/javascript.py +643 -0
  52. mcp_vector_search/parsers/php.py +694 -0
  53. mcp_vector_search/parsers/python.py +502 -0
  54. mcp_vector_search/parsers/registry.py +223 -0
  55. mcp_vector_search/parsers/ruby.py +678 -0
  56. mcp_vector_search/parsers/text.py +186 -0
  57. mcp_vector_search/parsers/utils.py +265 -0
  58. mcp_vector_search/py.typed +1 -0
  59. mcp_vector_search/utils/__init__.py +40 -0
  60. mcp_vector_search/utils/gitignore.py +250 -0
  61. mcp_vector_search/utils/monorepo.py +277 -0
  62. mcp_vector_search/utils/timing.py +334 -0
  63. mcp_vector_search/utils/version.py +47 -0
  64. mcp_vector_search-0.12.6.dist-info/METADATA +754 -0
  65. mcp_vector_search-0.12.6.dist-info/RECORD +68 -0
  66. mcp_vector_search-0.12.6.dist-info/WHEEL +4 -0
  67. mcp_vector_search-0.12.6.dist-info/entry_points.txt +2 -0
  68. mcp_vector_search-0.12.6.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,744 @@
1
+ """Index command for MCP Vector Search CLI."""
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+
6
+ import typer
7
+ from loguru import logger
8
+
9
+ from ...config.defaults import get_default_cache_path
10
+ from ...core.database import ChromaVectorDatabase
11
+ from ...core.embeddings import create_embedding_function
12
+ from ...core.exceptions import ProjectNotFoundError
13
+ from ...core.indexer import SemanticIndexer
14
+ from ...core.project import ProjectManager
15
+ from ..output import (
16
+ print_error,
17
+ print_index_stats,
18
+ print_info,
19
+ print_next_steps,
20
+ print_success,
21
+ print_tip,
22
+ )
23
+
24
+ # Create index subcommand app with callback for direct usage
25
+ index_app = typer.Typer(
26
+ help="Index codebase for semantic search",
27
+ invoke_without_command=True,
28
+ )
29
+
30
+
31
+ @index_app.callback(invoke_without_command=True)
32
+ def main(
33
+ ctx: typer.Context,
34
+ watch: bool = typer.Option(
35
+ False,
36
+ "--watch",
37
+ "-w",
38
+ help="Watch for file changes and update index incrementally",
39
+ rich_help_panel="⚙️ Advanced Options",
40
+ ),
41
+ incremental: bool = typer.Option(
42
+ True,
43
+ "--incremental/--full",
44
+ help="Use incremental indexing (skip unchanged files)",
45
+ rich_help_panel="📊 Indexing Options",
46
+ ),
47
+ extensions: str | None = typer.Option(
48
+ None,
49
+ "--extensions",
50
+ "-e",
51
+ help="Override file extensions to index (comma-separated)",
52
+ rich_help_panel="📁 Configuration",
53
+ ),
54
+ force: bool = typer.Option(
55
+ False,
56
+ "--force",
57
+ "-f",
58
+ help="Force reindexing of all files",
59
+ rich_help_panel="📊 Indexing Options",
60
+ ),
61
+ batch_size: int = typer.Option(
62
+ 32,
63
+ "--batch-size",
64
+ "-b",
65
+ help="Batch size for embedding generation",
66
+ min=1,
67
+ max=128,
68
+ rich_help_panel="⚡ Performance",
69
+ ),
70
+ debug: bool = typer.Option(
71
+ False,
72
+ "--debug",
73
+ "-d",
74
+ help="Enable debug output (shows hierarchy building details)",
75
+ rich_help_panel="🔍 Debugging",
76
+ ),
77
+ ) -> None:
78
+ """📑 Index your codebase for semantic search.
79
+
80
+ Parses code files, generates semantic embeddings, and stores them in ChromaDB.
81
+ Supports incremental indexing to skip unchanged files for faster updates.
82
+
83
+ [bold cyan]Basic Examples:[/bold cyan]
84
+
85
+ [green]Index entire project:[/green]
86
+ $ mcp-vector-search index
87
+
88
+ [green]Force full reindex:[/green]
89
+ $ mcp-vector-search index --force
90
+
91
+ [green]Custom file extensions:[/green]
92
+ $ mcp-vector-search index --extensions .py,.js,.ts,.md
93
+
94
+ [bold cyan]Advanced Usage:[/bold cyan]
95
+
96
+ [green]Watch mode (experimental):[/green]
97
+ $ mcp-vector-search index --watch
98
+
99
+ [green]Full reindex (no incremental):[/green]
100
+ $ mcp-vector-search index --full
101
+
102
+ [green]Optimize for large projects:[/green]
103
+ $ mcp-vector-search index --batch-size 64
104
+
105
+ [dim]💡 Tip: Use incremental indexing (default) for faster updates on subsequent runs.[/dim]
106
+ """
107
+ # If a subcommand was invoked, don't run the indexing logic
108
+ if ctx.invoked_subcommand is not None:
109
+ return
110
+
111
+ try:
112
+ project_root = (ctx.obj.get("project_root") if ctx.obj else None) or Path.cwd()
113
+
114
+ # Run async indexing
115
+ asyncio.run(
116
+ run_indexing(
117
+ project_root=project_root,
118
+ watch=watch,
119
+ incremental=incremental,
120
+ extensions=extensions,
121
+ force_reindex=force,
122
+ batch_size=batch_size,
123
+ show_progress=True,
124
+ debug=debug,
125
+ )
126
+ )
127
+
128
+ except KeyboardInterrupt:
129
+ print_info("Indexing interrupted by user")
130
+ raise typer.Exit(0)
131
+ except Exception as e:
132
+ logger.error(f"Indexing failed: {e}")
133
+ print_error(f"Indexing failed: {e}")
134
+ raise typer.Exit(1)
135
+
136
+
137
+ async def run_indexing(
138
+ project_root: Path,
139
+ watch: bool = False,
140
+ incremental: bool = True,
141
+ extensions: str | None = None,
142
+ force_reindex: bool = False,
143
+ batch_size: int = 32,
144
+ show_progress: bool = True,
145
+ debug: bool = False,
146
+ ) -> None:
147
+ """Run the indexing process."""
148
+ # Load project configuration
149
+ project_manager = ProjectManager(project_root)
150
+
151
+ if not project_manager.is_initialized():
152
+ raise ProjectNotFoundError(
153
+ f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
154
+ )
155
+
156
+ config = project_manager.load_config()
157
+
158
+ # Override extensions if provided
159
+ if extensions:
160
+ file_extensions = [ext.strip() for ext in extensions.split(",")]
161
+ file_extensions = [
162
+ ext if ext.startswith(".") else f".{ext}" for ext in file_extensions
163
+ ]
164
+ # Create a modified config copy with overridden extensions
165
+ config = config.model_copy(update={"file_extensions": file_extensions})
166
+
167
+ print_info(f"Indexing project: {project_root}")
168
+ print_info(f"File extensions: {', '.join(config.file_extensions)}")
169
+ print_info(f"Embedding model: {config.embedding_model}")
170
+
171
+ # Setup embedding function and cache
172
+ cache_dir = (
173
+ get_default_cache_path(project_root) if config.cache_embeddings else None
174
+ )
175
+ embedding_function, cache = create_embedding_function(
176
+ model_name=config.embedding_model,
177
+ cache_dir=cache_dir,
178
+ cache_size=config.max_cache_size,
179
+ )
180
+
181
+ # Setup database
182
+ database = ChromaVectorDatabase(
183
+ persist_directory=config.index_path,
184
+ embedding_function=embedding_function,
185
+ )
186
+
187
+ # Setup indexer
188
+ indexer = SemanticIndexer(
189
+ database=database,
190
+ project_root=project_root,
191
+ config=config,
192
+ debug=debug,
193
+ )
194
+
195
+ try:
196
+ async with database:
197
+ if watch:
198
+ await _run_watch_mode(indexer, show_progress)
199
+ else:
200
+ await _run_batch_indexing(indexer, force_reindex, show_progress)
201
+
202
+ except Exception as e:
203
+ logger.error(f"Indexing error: {e}")
204
+ raise
205
+
206
+
207
+ async def _run_batch_indexing(
208
+ indexer: SemanticIndexer,
209
+ force_reindex: bool,
210
+ show_progress: bool,
211
+ ) -> None:
212
+ """Run batch indexing of all files."""
213
+ if show_progress:
214
+ # Import enhanced progress utilities
215
+ from rich.layout import Layout
216
+ from rich.live import Live
217
+ from rich.panel import Panel
218
+ from rich.progress import (
219
+ BarColumn,
220
+ Progress,
221
+ SpinnerColumn,
222
+ TextColumn,
223
+ TimeRemainingColumn,
224
+ )
225
+ from rich.table import Table
226
+
227
+ from ..output import console
228
+
229
+ # Pre-scan to get total file count
230
+ console.print("[dim]Scanning for indexable files...[/dim]")
231
+ indexable_files, files_to_index = await indexer.get_files_to_index(
232
+ force_reindex=force_reindex
233
+ )
234
+ total_files = len(files_to_index)
235
+
236
+ if total_files == 0:
237
+ console.print("[yellow]No files need indexing[/yellow]")
238
+ indexed_count = 0
239
+ else:
240
+ console.print(f"[dim]Found {total_files} files to index[/dim]\n")
241
+
242
+ # Track recently indexed files for display
243
+ recent_files = []
244
+ current_file_name = ""
245
+ indexed_count = 0
246
+ failed_count = 0
247
+
248
+ # Create layout for two-panel display
249
+ layout = Layout()
250
+ layout.split_column(
251
+ Layout(name="progress", size=4),
252
+ Layout(name="samples", size=7),
253
+ )
254
+
255
+ # Create progress bar
256
+ progress = Progress(
257
+ SpinnerColumn(),
258
+ TextColumn("[progress.description]{task.description}"),
259
+ BarColumn(bar_width=40),
260
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
261
+ TextColumn("({task.completed}/{task.total} files)"),
262
+ TimeRemainingColumn(),
263
+ console=console,
264
+ )
265
+
266
+ task = progress.add_task("Indexing files...", total=total_files)
267
+
268
+ # Create live display with both panels
269
+ with Live(layout, console=console, refresh_per_second=4):
270
+ # Index files with progress updates
271
+ async for (
272
+ file_path,
273
+ chunks_added,
274
+ success,
275
+ ) in indexer.index_files_with_progress(files_to_index, force_reindex):
276
+ # Update counts
277
+ if success:
278
+ indexed_count += 1
279
+ else:
280
+ failed_count += 1
281
+
282
+ # Update progress
283
+ progress.update(task, advance=1)
284
+
285
+ # Update current file name for display
286
+ current_file_name = file_path.name
287
+
288
+ # Keep last 5 files for sampling display
289
+ try:
290
+ relative_path = str(file_path.relative_to(indexer.project_root))
291
+ except ValueError:
292
+ relative_path = str(file_path)
293
+
294
+ recent_files.append((relative_path, chunks_added, success))
295
+ if len(recent_files) > 5:
296
+ recent_files.pop(0)
297
+
298
+ # Update display layouts
299
+ layout["progress"].update(
300
+ Panel(
301
+ progress,
302
+ title="[bold]Indexing Progress[/bold]",
303
+ border_style="blue",
304
+ )
305
+ )
306
+
307
+ # Build samples panel content
308
+ samples_table = Table.grid(expand=True)
309
+ samples_table.add_column(style="dim")
310
+
311
+ if current_file_name:
312
+ samples_table.add_row(
313
+ f"[bold cyan]Currently processing:[/bold cyan] {current_file_name}"
314
+ )
315
+ samples_table.add_row("")
316
+
317
+ samples_table.add_row("[dim]Recently indexed:[/dim]")
318
+ for rel_path, chunk_count, file_success in recent_files[-5:]:
319
+ icon = "✓" if file_success else "✗"
320
+ style = "green" if file_success else "red"
321
+ chunk_info = (
322
+ f"({chunk_count} chunks)"
323
+ if chunk_count > 0
324
+ else "(no chunks)"
325
+ )
326
+ samples_table.add_row(
327
+ f" [{style}]{icon}[/{style}] [cyan]{rel_path}[/cyan] [dim]{chunk_info}[/dim]"
328
+ )
329
+
330
+ layout["samples"].update(
331
+ Panel(
332
+ samples_table,
333
+ title="[bold]File Processing[/bold]",
334
+ border_style="dim",
335
+ )
336
+ )
337
+
338
+ # Rebuild directory index after indexing completes
339
+ try:
340
+ import os
341
+
342
+ chunk_stats = {}
343
+ for file_path in files_to_index:
344
+ try:
345
+ mtime = os.path.getmtime(file_path)
346
+ chunk_stats[str(file_path)] = {
347
+ "modified": mtime,
348
+ "chunks": 1, # Placeholder - real counts are in database
349
+ }
350
+ except OSError:
351
+ pass
352
+
353
+ indexer.directory_index.rebuild_from_files(
354
+ files_to_index, indexer.project_root, chunk_stats=chunk_stats
355
+ )
356
+ indexer.directory_index.save()
357
+ except Exception as e:
358
+ logger.error(f"Failed to update directory index: {e}")
359
+
360
+ # Final progress summary
361
+ console.print()
362
+ if failed_count > 0:
363
+ console.print(
364
+ f"[yellow]⚠ {failed_count} files failed to index[/yellow]"
365
+ )
366
+ error_log_path = (
367
+ indexer.project_root / ".mcp-vector-search" / "indexing_errors.log"
368
+ )
369
+ if error_log_path.exists():
370
+ # Prune log to keep only last 1000 errors
371
+ _prune_error_log(error_log_path, max_lines=1000)
372
+ console.print(f"[dim] → See details in: {error_log_path}[/dim]")
373
+ else:
374
+ # Non-progress mode (fallback to original behavior)
375
+ indexed_count = await indexer.index_project(
376
+ force_reindex=force_reindex,
377
+ show_progress=show_progress,
378
+ )
379
+
380
+ # Show statistics
381
+ stats = await indexer.get_indexing_stats()
382
+
383
+ # Display success message with chunk count for clarity
384
+ total_chunks = stats.get("total_chunks", 0)
385
+ print_success(
386
+ f"Processed {indexed_count} files ({total_chunks} searchable chunks created)"
387
+ )
388
+
389
+ print_index_stats(stats)
390
+
391
+ # Add next-step hints
392
+ if indexed_count > 0:
393
+ steps = [
394
+ "[cyan]mcp-vector-search search 'your query'[/cyan] - Try semantic search",
395
+ "[cyan]mcp-vector-search status[/cyan] - View detailed statistics",
396
+ ]
397
+ print_next_steps(steps, title="Ready to Search")
398
+ else:
399
+ print_info("\n[bold]No files were indexed. Possible reasons:[/bold]")
400
+ print_info(" • No matching files found for configured extensions")
401
+ print_info(" • All files already indexed (use --force to reindex)")
402
+ print_tip(
403
+ "Check configured extensions with [cyan]mcp-vector-search status[/cyan]"
404
+ )
405
+
406
+
407
+ async def _run_watch_mode(indexer: SemanticIndexer, show_progress: bool) -> None:
408
+ """Run indexing in watch mode."""
409
+ print_info("Starting watch mode - press Ctrl+C to stop")
410
+
411
+ # TODO: Implement file watching with incremental updates
412
+ # This would use the watchdog library to monitor file changes
413
+ # and call indexer.reindex_file() for changed files
414
+
415
+ print_error("Watch mode not yet implemented")
416
+ raise NotImplementedError("Watch mode will be implemented in Phase 1B")
417
+
418
+
419
+ @index_app.command("reindex")
420
+ def reindex_file(
421
+ ctx: typer.Context,
422
+ file_path: Path | None = typer.Argument(
423
+ None,
424
+ help="File to reindex (optional - if not provided, reindexes entire project)",
425
+ exists=True,
426
+ file_okay=True,
427
+ dir_okay=False,
428
+ readable=True,
429
+ ),
430
+ all: bool = typer.Option(
431
+ False,
432
+ "--all",
433
+ "-a",
434
+ help="Explicitly reindex entire project",
435
+ ),
436
+ force: bool = typer.Option(
437
+ False,
438
+ "--force",
439
+ "-f",
440
+ help="Skip confirmation prompt when reindexing entire project",
441
+ ),
442
+ ) -> None:
443
+ """Reindex files in the project.
444
+
445
+ Can reindex a specific file or the entire project:
446
+ - Without arguments: reindexes entire project (with confirmation)
447
+ - With file path: reindexes specific file
448
+ - With --all flag: explicitly reindexes entire project
449
+
450
+ Examples:
451
+ mcp-vector-search index reindex # Reindex entire project
452
+ mcp-vector-search index reindex --all # Explicitly reindex entire project
453
+ mcp-vector-search index reindex src/main.py # Reindex specific file
454
+ mcp-vector-search index reindex --all --force # Reindex entire project without confirmation
455
+ """
456
+ try:
457
+ project_root = ctx.obj.get("project_root") or Path.cwd()
458
+
459
+ # Determine what to reindex
460
+ if file_path is not None and all:
461
+ print_error("Cannot specify both a file path and --all flag")
462
+ raise typer.Exit(1)
463
+
464
+ if file_path is not None:
465
+ # Reindex specific file
466
+ asyncio.run(_reindex_single_file(project_root, file_path))
467
+ else:
468
+ # Reindex entire project
469
+ if not force and not all:
470
+ from ..output import confirm_action
471
+
472
+ if not confirm_action(
473
+ "This will reindex the entire project. Continue?", default=False
474
+ ):
475
+ print_info("Reindex operation cancelled")
476
+ raise typer.Exit(0)
477
+
478
+ # Use the full project reindexing
479
+ asyncio.run(_reindex_entire_project(project_root))
480
+
481
+ except typer.Exit:
482
+ # Re-raise Exit exceptions without logging as errors
483
+ raise
484
+ except Exception as e:
485
+ logger.error(f"Reindexing failed: {e}")
486
+ print_error(f"Reindexing failed: {e}")
487
+ raise typer.Exit(1)
488
+
489
+
490
+ async def _reindex_entire_project(project_root: Path) -> None:
491
+ """Reindex the entire project."""
492
+ print_info("Starting full project reindex...")
493
+
494
+ # Load project configuration
495
+ project_manager = ProjectManager(project_root)
496
+
497
+ if not project_manager.is_initialized():
498
+ raise ProjectNotFoundError(
499
+ f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
500
+ )
501
+
502
+ config = project_manager.load_config()
503
+
504
+ print_info(f"Project: {project_root}")
505
+ print_info(f"File extensions: {', '.join(config.file_extensions)}")
506
+ print_info(f"Embedding model: {config.embedding_model}")
507
+
508
+ # Setup embedding function and cache
509
+ cache_dir = (
510
+ get_default_cache_path(project_root) if config.cache_embeddings else None
511
+ )
512
+ embedding_function, cache = create_embedding_function(
513
+ model_name=config.embedding_model,
514
+ cache_dir=cache_dir,
515
+ cache_size=config.max_cache_size,
516
+ )
517
+
518
+ # Setup database
519
+ database = ChromaVectorDatabase(
520
+ persist_directory=config.index_path,
521
+ embedding_function=embedding_function,
522
+ )
523
+
524
+ # Setup indexer
525
+ indexer = SemanticIndexer(
526
+ database=database,
527
+ project_root=project_root,
528
+ config=config,
529
+ )
530
+
531
+ try:
532
+ async with database:
533
+ # First, clean the existing index
534
+ print_info("Clearing existing index...")
535
+ await database.reset()
536
+
537
+ # Then reindex everything with enhanced progress display
538
+ await _run_batch_indexing(indexer, force_reindex=True, show_progress=True)
539
+
540
+ except Exception as e:
541
+ logger.error(f"Full reindex error: {e}")
542
+ raise
543
+
544
+
545
+ async def _reindex_single_file(project_root: Path, file_path: Path) -> None:
546
+ """Reindex a single file."""
547
+ # Load project configuration
548
+ project_manager = ProjectManager(project_root)
549
+ config = project_manager.load_config()
550
+
551
+ # Make file path absolute if it's not already
552
+ if not file_path.is_absolute():
553
+ file_path = file_path.resolve()
554
+
555
+ # Check if file exists
556
+ if not file_path.exists():
557
+ print_error(f"File not found: {file_path}")
558
+ return
559
+
560
+ # Check if file is within project root
561
+ try:
562
+ file_path.relative_to(project_root)
563
+ except ValueError:
564
+ print_error(f"File {file_path} is not within project root {project_root}")
565
+ return
566
+
567
+ # Setup components
568
+ embedding_function, cache = create_embedding_function(
569
+ model_name=config.embedding_model,
570
+ cache_dir=get_default_cache_path(project_root)
571
+ if config.cache_embeddings
572
+ else None,
573
+ )
574
+
575
+ database = ChromaVectorDatabase(
576
+ persist_directory=config.index_path,
577
+ embedding_function=embedding_function,
578
+ )
579
+
580
+ indexer = SemanticIndexer(
581
+ database=database,
582
+ project_root=project_root,
583
+ config=config,
584
+ )
585
+
586
+ async with database:
587
+ success = await indexer.reindex_file(file_path)
588
+
589
+ if success:
590
+ print_success(f"Reindexed: {file_path}")
591
+ else:
592
+ print_error(f"Failed to reindex: {file_path}")
593
+ # Check if file extension is in the list of indexable extensions
594
+ if file_path.suffix not in config.file_extensions:
595
+ print_info(
596
+ f"Note: {file_path.suffix} is not in the configured file extensions: {', '.join(config.file_extensions)}"
597
+ )
598
+
599
+
600
+ @index_app.command("clean")
601
+ def clean_index(
602
+ ctx: typer.Context,
603
+ confirm: bool = typer.Option(
604
+ False,
605
+ "--yes",
606
+ "-y",
607
+ help="Skip confirmation prompt",
608
+ ),
609
+ ) -> None:
610
+ """Clean the search index (remove all indexed data)."""
611
+ try:
612
+ project_root = ctx.obj.get("project_root") or Path.cwd()
613
+
614
+ if not confirm:
615
+ from ..output import confirm_action
616
+
617
+ if not confirm_action(
618
+ "This will delete all indexed data. Continue?", default=False
619
+ ):
620
+ print_info("Clean operation cancelled")
621
+ raise typer.Exit(0)
622
+
623
+ asyncio.run(_clean_index(project_root))
624
+
625
+ except Exception as e:
626
+ logger.error(f"Clean failed: {e}")
627
+ print_error(f"Clean failed: {e}")
628
+ raise typer.Exit(1)
629
+
630
+
631
+ async def _clean_index(project_root: Path) -> None:
632
+ """Clean the search index."""
633
+ project_manager = ProjectManager(project_root)
634
+ config = project_manager.load_config()
635
+
636
+ # Setup database
637
+ embedding_function, _ = create_embedding_function(config.embedding_model)
638
+ database = ChromaVectorDatabase(
639
+ persist_directory=config.index_path,
640
+ embedding_function=embedding_function,
641
+ )
642
+
643
+ async with database:
644
+ await database.reset()
645
+ print_success("Index cleaned successfully")
646
+
647
+
648
+ # ============================================================================
649
+ # INDEX SUBCOMMANDS
650
+ # ============================================================================
651
+
652
+
653
+ @index_app.command("watch")
654
+ def watch_cmd(
655
+ project_root: Path = typer.Argument(
656
+ Path.cwd(),
657
+ help="Project root directory to watch",
658
+ exists=True,
659
+ file_okay=False,
660
+ dir_okay=True,
661
+ readable=True,
662
+ ),
663
+ ) -> None:
664
+ """👀 Watch for file changes and auto-update index.
665
+
666
+ Monitors your project directory for file changes and automatically updates
667
+ the search index when files are modified, added, or deleted.
668
+
669
+ Examples:
670
+ mcp-vector-search index watch
671
+ mcp-vector-search index watch /path/to/project
672
+ """
673
+ from .watch import app as watch_app
674
+
675
+ # Import and run watch command
676
+ watch_app()
677
+
678
+
679
+ # Import and register auto-index sub-app as a proper typer group
680
+ from .auto_index import auto_index_app # noqa: E402
681
+
682
+ index_app.add_typer(auto_index_app, name="auto", help="🔄 Manage automatic indexing")
683
+
684
+
685
+ @index_app.command("health")
686
+ def health_cmd(
687
+ project_root: Path | None = typer.Option(
688
+ None,
689
+ "--project-root",
690
+ "-p",
691
+ help="Project root directory",
692
+ exists=True,
693
+ file_okay=False,
694
+ dir_okay=True,
695
+ readable=True,
696
+ ),
697
+ repair: bool = typer.Option(
698
+ False,
699
+ "--repair",
700
+ help="Attempt to repair index issues",
701
+ ),
702
+ ) -> None:
703
+ """🩺 Check index health and optionally repair.
704
+
705
+ Validates the search index integrity and provides diagnostic information.
706
+ Can attempt to repair common issues automatically.
707
+
708
+ Examples:
709
+ mcp-vector-search index health
710
+ mcp-vector-search index health --repair
711
+ """
712
+ from .reset import health_main
713
+
714
+ # Call the health function from reset.py
715
+ health_main(project_root=project_root, repair=repair)
716
+
717
+
718
+ def _prune_error_log(log_path: Path, max_lines: int = 1000) -> None:
719
+ """Prune error log to keep only the most recent N lines.
720
+
721
+ Args:
722
+ log_path: Path to the error log file
723
+ max_lines: Maximum number of lines to keep (default: 1000)
724
+ """
725
+ try:
726
+ with open(log_path) as f:
727
+ lines = f.readlines()
728
+
729
+ if len(lines) > max_lines:
730
+ # Keep only the last max_lines lines
731
+ pruned_lines = lines[-max_lines:]
732
+
733
+ with open(log_path, "w") as f:
734
+ f.writelines(pruned_lines)
735
+
736
+ logger.debug(
737
+ f"Pruned error log from {len(lines)} to {len(pruned_lines)} lines"
738
+ )
739
+ except Exception as e:
740
+ logger.warning(f"Failed to prune error log: {e}")
741
+
742
+
743
+ if __name__ == "__main__":
744
+ index_app()