mcp-vector-search 0.15.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (86) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/chat.py +534 -0
  6. mcp_vector_search/cli/commands/config.py +393 -0
  7. mcp_vector_search/cli/commands/demo.py +358 -0
  8. mcp_vector_search/cli/commands/index.py +762 -0
  9. mcp_vector_search/cli/commands/init.py +658 -0
  10. mcp_vector_search/cli/commands/install.py +869 -0
  11. mcp_vector_search/cli/commands/install_old.py +700 -0
  12. mcp_vector_search/cli/commands/mcp.py +1254 -0
  13. mcp_vector_search/cli/commands/reset.py +393 -0
  14. mcp_vector_search/cli/commands/search.py +796 -0
  15. mcp_vector_search/cli/commands/setup.py +1133 -0
  16. mcp_vector_search/cli/commands/status.py +584 -0
  17. mcp_vector_search/cli/commands/uninstall.py +404 -0
  18. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  19. mcp_vector_search/cli/commands/visualize/cli.py +265 -0
  20. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  21. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  22. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  23. mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
  24. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  25. mcp_vector_search/cli/commands/visualize/server.py +201 -0
  26. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  27. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  28. mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
  29. mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
  30. mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
  31. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  32. mcp_vector_search/cli/commands/watch.py +287 -0
  33. mcp_vector_search/cli/didyoumean.py +520 -0
  34. mcp_vector_search/cli/export.py +320 -0
  35. mcp_vector_search/cli/history.py +295 -0
  36. mcp_vector_search/cli/interactive.py +342 -0
  37. mcp_vector_search/cli/main.py +484 -0
  38. mcp_vector_search/cli/output.py +414 -0
  39. mcp_vector_search/cli/suggestions.py +375 -0
  40. mcp_vector_search/config/__init__.py +1 -0
  41. mcp_vector_search/config/constants.py +24 -0
  42. mcp_vector_search/config/defaults.py +200 -0
  43. mcp_vector_search/config/settings.py +146 -0
  44. mcp_vector_search/core/__init__.py +1 -0
  45. mcp_vector_search/core/auto_indexer.py +298 -0
  46. mcp_vector_search/core/config_utils.py +394 -0
  47. mcp_vector_search/core/connection_pool.py +360 -0
  48. mcp_vector_search/core/database.py +1237 -0
  49. mcp_vector_search/core/directory_index.py +318 -0
  50. mcp_vector_search/core/embeddings.py +294 -0
  51. mcp_vector_search/core/exceptions.py +89 -0
  52. mcp_vector_search/core/factory.py +318 -0
  53. mcp_vector_search/core/git_hooks.py +345 -0
  54. mcp_vector_search/core/indexer.py +1002 -0
  55. mcp_vector_search/core/llm_client.py +453 -0
  56. mcp_vector_search/core/models.py +294 -0
  57. mcp_vector_search/core/project.py +350 -0
  58. mcp_vector_search/core/scheduler.py +330 -0
  59. mcp_vector_search/core/search.py +952 -0
  60. mcp_vector_search/core/watcher.py +322 -0
  61. mcp_vector_search/mcp/__init__.py +5 -0
  62. mcp_vector_search/mcp/__main__.py +25 -0
  63. mcp_vector_search/mcp/server.py +752 -0
  64. mcp_vector_search/parsers/__init__.py +8 -0
  65. mcp_vector_search/parsers/base.py +296 -0
  66. mcp_vector_search/parsers/dart.py +605 -0
  67. mcp_vector_search/parsers/html.py +413 -0
  68. mcp_vector_search/parsers/javascript.py +643 -0
  69. mcp_vector_search/parsers/php.py +694 -0
  70. mcp_vector_search/parsers/python.py +502 -0
  71. mcp_vector_search/parsers/registry.py +223 -0
  72. mcp_vector_search/parsers/ruby.py +678 -0
  73. mcp_vector_search/parsers/text.py +186 -0
  74. mcp_vector_search/parsers/utils.py +265 -0
  75. mcp_vector_search/py.typed +1 -0
  76. mcp_vector_search/utils/__init__.py +42 -0
  77. mcp_vector_search/utils/gitignore.py +250 -0
  78. mcp_vector_search/utils/gitignore_updater.py +212 -0
  79. mcp_vector_search/utils/monorepo.py +339 -0
  80. mcp_vector_search/utils/timing.py +338 -0
  81. mcp_vector_search/utils/version.py +47 -0
  82. mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
  83. mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
  84. mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
  85. mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
  86. mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,762 @@
1
+ """Index command for MCP Vector Search CLI."""
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+
6
+ import typer
7
+ from loguru import logger
8
+
9
+ from ...config.defaults import get_default_cache_path
10
+ from ...core.database import ChromaVectorDatabase
11
+ from ...core.embeddings import create_embedding_function
12
+ from ...core.exceptions import ProjectNotFoundError
13
+ from ...core.indexer import SemanticIndexer
14
+ from ...core.project import ProjectManager
15
+ from ..output import (
16
+ print_error,
17
+ print_index_stats,
18
+ print_info,
19
+ print_next_steps,
20
+ print_success,
21
+ print_tip,
22
+ )
23
+
24
+ # Create index subcommand app with callback for direct usage
25
+ index_app = typer.Typer(
26
+ help="Index codebase for semantic search",
27
+ invoke_without_command=True,
28
+ )
29
+
30
+
31
+ @index_app.callback(invoke_without_command=True)
32
+ def main(
33
+ ctx: typer.Context,
34
+ watch: bool = typer.Option(
35
+ False,
36
+ "--watch",
37
+ "-w",
38
+ help="Watch for file changes and update index incrementally",
39
+ rich_help_panel="⚙️ Advanced Options",
40
+ ),
41
+ incremental: bool = typer.Option(
42
+ True,
43
+ "--incremental/--full",
44
+ help="Use incremental indexing (skip unchanged files)",
45
+ rich_help_panel="📊 Indexing Options",
46
+ ),
47
+ extensions: str | None = typer.Option(
48
+ None,
49
+ "--extensions",
50
+ "-e",
51
+ help="Override file extensions to index (comma-separated)",
52
+ rich_help_panel="📁 Configuration",
53
+ ),
54
+ force: bool = typer.Option(
55
+ False,
56
+ "--force",
57
+ "-f",
58
+ help="Force reindexing of all files",
59
+ rich_help_panel="📊 Indexing Options",
60
+ ),
61
+ batch_size: int = typer.Option(
62
+ 32,
63
+ "--batch-size",
64
+ "-b",
65
+ help="Batch size for embedding generation",
66
+ min=1,
67
+ max=128,
68
+ rich_help_panel="⚡ Performance",
69
+ ),
70
+ debug: bool = typer.Option(
71
+ False,
72
+ "--debug",
73
+ "-d",
74
+ help="Enable debug output (shows hierarchy building details)",
75
+ rich_help_panel="🔍 Debugging",
76
+ ),
77
+ ) -> None:
78
+ """📑 Index your codebase for semantic search.
79
+
80
+ Parses code files, generates semantic embeddings, and stores them in ChromaDB.
81
+ Supports incremental indexing to skip unchanged files for faster updates.
82
+
83
+ [bold cyan]Basic Examples:[/bold cyan]
84
+
85
+ [green]Index entire project:[/green]
86
+ $ mcp-vector-search index
87
+
88
+ [green]Force full reindex:[/green]
89
+ $ mcp-vector-search index --force
90
+
91
+ [green]Custom file extensions:[/green]
92
+ $ mcp-vector-search index --extensions .py,.js,.ts,.md
93
+
94
+ [bold cyan]Advanced Usage:[/bold cyan]
95
+
96
+ [green]Watch mode (experimental):[/green]
97
+ $ mcp-vector-search index --watch
98
+
99
+ [green]Full reindex (no incremental):[/green]
100
+ $ mcp-vector-search index --full
101
+
102
+ [green]Optimize for large projects:[/green]
103
+ $ mcp-vector-search index --batch-size 64
104
+
105
+ [dim]💡 Tip: Use incremental indexing (default) for faster updates on subsequent runs.[/dim]
106
+ """
107
+ # If a subcommand was invoked, don't run the indexing logic
108
+ if ctx.invoked_subcommand is not None:
109
+ return
110
+
111
+ try:
112
+ project_root = (ctx.obj.get("project_root") if ctx.obj else None) or Path.cwd()
113
+
114
+ # Run async indexing
115
+ asyncio.run(
116
+ run_indexing(
117
+ project_root=project_root,
118
+ watch=watch,
119
+ incremental=incremental,
120
+ extensions=extensions,
121
+ force_reindex=force,
122
+ batch_size=batch_size,
123
+ show_progress=True,
124
+ debug=debug,
125
+ )
126
+ )
127
+
128
+ except KeyboardInterrupt:
129
+ print_info("Indexing interrupted by user")
130
+ raise typer.Exit(0)
131
+ except Exception as e:
132
+ logger.error(f"Indexing failed: {e}")
133
+ print_error(f"Indexing failed: {e}")
134
+ raise typer.Exit(1)
135
+
136
+
137
+ async def run_indexing(
138
+ project_root: Path,
139
+ watch: bool = False,
140
+ incremental: bool = True,
141
+ extensions: str | None = None,
142
+ force_reindex: bool = False,
143
+ batch_size: int = 32,
144
+ show_progress: bool = True,
145
+ debug: bool = False,
146
+ ) -> None:
147
+ """Run the indexing process."""
148
+ # Load project configuration
149
+ project_manager = ProjectManager(project_root)
150
+
151
+ if not project_manager.is_initialized():
152
+ raise ProjectNotFoundError(
153
+ f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
154
+ )
155
+
156
+ config = project_manager.load_config()
157
+
158
+ # Override extensions if provided
159
+ if extensions:
160
+ file_extensions = [ext.strip() for ext in extensions.split(",")]
161
+ file_extensions = [
162
+ ext if ext.startswith(".") else f".{ext}" for ext in file_extensions
163
+ ]
164
+ # Create a modified config copy with overridden extensions
165
+ config = config.model_copy(update={"file_extensions": file_extensions})
166
+
167
+ print_info(f"Indexing project: {project_root}")
168
+ print_info(f"File extensions: {', '.join(config.file_extensions)}")
169
+ print_info(f"Embedding model: {config.embedding_model}")
170
+
171
+ # Setup embedding function and cache
172
+ cache_dir = (
173
+ get_default_cache_path(project_root) if config.cache_embeddings else None
174
+ )
175
+ embedding_function, cache = create_embedding_function(
176
+ model_name=config.embedding_model,
177
+ cache_dir=cache_dir,
178
+ cache_size=config.max_cache_size,
179
+ )
180
+
181
+ # Setup database
182
+ database = ChromaVectorDatabase(
183
+ persist_directory=config.index_path,
184
+ embedding_function=embedding_function,
185
+ )
186
+
187
+ # Setup indexer
188
+ indexer = SemanticIndexer(
189
+ database=database,
190
+ project_root=project_root,
191
+ config=config,
192
+ debug=debug,
193
+ )
194
+
195
+ try:
196
+ async with database:
197
+ if watch:
198
+ await _run_watch_mode(indexer, show_progress)
199
+ else:
200
+ await _run_batch_indexing(indexer, force_reindex, show_progress)
201
+
202
+ except Exception as e:
203
+ logger.error(f"Indexing error: {e}")
204
+ raise
205
+
206
+
207
+ async def _run_batch_indexing(
208
+ indexer: SemanticIndexer,
209
+ force_reindex: bool,
210
+ show_progress: bool,
211
+ ) -> None:
212
+ """Run batch indexing of all files."""
213
+ if show_progress:
214
+ # Import enhanced progress utilities
215
+ from rich.layout import Layout
216
+ from rich.live import Live
217
+ from rich.panel import Panel
218
+ from rich.progress import (
219
+ BarColumn,
220
+ Progress,
221
+ SpinnerColumn,
222
+ TextColumn,
223
+ TimeRemainingColumn,
224
+ )
225
+ from rich.table import Table
226
+
227
+ from ..output import console
228
+
229
+ # Pre-scan to get total file count
230
+ console.print("[dim]Scanning for indexable files...[/dim]")
231
+ indexable_files, files_to_index = await indexer.get_files_to_index(
232
+ force_reindex=force_reindex
233
+ )
234
+ total_files = len(files_to_index)
235
+
236
+ if total_files == 0:
237
+ console.print("[yellow]No files need indexing[/yellow]")
238
+ indexed_count = 0
239
+ else:
240
+ console.print(f"[dim]Found {total_files} files to index[/dim]\n")
241
+
242
+ # Track recently indexed files for display
243
+ recent_files = []
244
+ current_file_name = ""
245
+ indexed_count = 0
246
+ failed_count = 0
247
+
248
+ # Create layout for two-panel display
249
+ layout = Layout()
250
+ layout.split_column(
251
+ Layout(name="progress", size=4),
252
+ Layout(name="samples", size=7),
253
+ )
254
+
255
+ # Create progress bar
256
+ progress = Progress(
257
+ SpinnerColumn(),
258
+ TextColumn("[progress.description]{task.description}"),
259
+ BarColumn(bar_width=40),
260
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
261
+ TextColumn("({task.completed}/{task.total} files)"),
262
+ TimeRemainingColumn(),
263
+ console=console,
264
+ )
265
+
266
+ task = progress.add_task("Indexing files...", total=total_files)
267
+
268
+ # Create live display with both panels
269
+ with Live(layout, console=console, refresh_per_second=4):
270
+ # Index files with progress updates
271
+ async for (
272
+ file_path,
273
+ chunks_added,
274
+ success,
275
+ ) in indexer.index_files_with_progress(files_to_index, force_reindex):
276
+ # Update counts
277
+ if success:
278
+ indexed_count += 1
279
+ else:
280
+ failed_count += 1
281
+
282
+ # Update progress
283
+ progress.update(task, advance=1)
284
+
285
+ # Update current file name for display
286
+ current_file_name = file_path.name
287
+
288
+ # Keep last 5 files for sampling display
289
+ try:
290
+ relative_path = str(file_path.relative_to(indexer.project_root))
291
+ except ValueError:
292
+ relative_path = str(file_path)
293
+
294
+ recent_files.append((relative_path, chunks_added, success))
295
+ if len(recent_files) > 5:
296
+ recent_files.pop(0)
297
+
298
+ # Update display layouts
299
+ layout["progress"].update(
300
+ Panel(
301
+ progress,
302
+ title="[bold]Indexing Progress[/bold]",
303
+ border_style="blue",
304
+ )
305
+ )
306
+
307
+ # Build samples panel content
308
+ samples_table = Table.grid(expand=True)
309
+ samples_table.add_column(style="dim")
310
+
311
+ if current_file_name:
312
+ samples_table.add_row(
313
+ f"[bold cyan]Currently processing:[/bold cyan] {current_file_name}"
314
+ )
315
+ samples_table.add_row("")
316
+
317
+ samples_table.add_row("[dim]Recently indexed:[/dim]")
318
+ for rel_path, chunk_count, file_success in recent_files[-5:]:
319
+ icon = "✓" if file_success else "✗"
320
+ style = "green" if file_success else "red"
321
+ chunk_info = (
322
+ f"({chunk_count} chunks)"
323
+ if chunk_count > 0
324
+ else "(no chunks)"
325
+ )
326
+ samples_table.add_row(
327
+ f" [{style}]{icon}[/{style}] [cyan]{rel_path}[/cyan] [dim]{chunk_info}[/dim]"
328
+ )
329
+
330
+ layout["samples"].update(
331
+ Panel(
332
+ samples_table,
333
+ title="[bold]File Processing[/bold]",
334
+ border_style="dim",
335
+ )
336
+ )
337
+
338
+ # Rebuild directory index after indexing completes
339
+ try:
340
+ import os
341
+
342
+ chunk_stats = {}
343
+ for file_path in files_to_index:
344
+ try:
345
+ mtime = os.path.getmtime(file_path)
346
+ chunk_stats[str(file_path)] = {
347
+ "modified": mtime,
348
+ "chunks": 1, # Placeholder - real counts are in database
349
+ }
350
+ except OSError:
351
+ pass
352
+
353
+ indexer.directory_index.rebuild_from_files(
354
+ files_to_index, indexer.project_root, chunk_stats=chunk_stats
355
+ )
356
+ indexer.directory_index.save()
357
+ except Exception as e:
358
+ logger.error(f"Failed to update directory index: {e}")
359
+
360
+ # Final progress summary
361
+ console.print()
362
+ if failed_count > 0:
363
+ console.print(
364
+ f"[yellow]⚠ {failed_count} files failed to index[/yellow]"
365
+ )
366
+ error_log_path = (
367
+ indexer.project_root / ".mcp-vector-search" / "indexing_errors.log"
368
+ )
369
+ if error_log_path.exists():
370
+ # Prune log to keep only last 1000 errors
371
+ _prune_error_log(error_log_path, max_lines=1000)
372
+ console.print(f"[dim] → See details in: {error_log_path}[/dim]")
373
+ else:
374
+ # Non-progress mode (fallback to original behavior)
375
+ indexed_count = await indexer.index_project(
376
+ force_reindex=force_reindex,
377
+ show_progress=show_progress,
378
+ )
379
+
380
+ # Show statistics
381
+ stats = await indexer.get_indexing_stats()
382
+
383
+ # Display success message with chunk count for clarity
384
+ total_chunks = stats.get("total_chunks", 0)
385
+ print_success(
386
+ f"Processed {indexed_count} files ({total_chunks} searchable chunks created)"
387
+ )
388
+
389
+ print_index_stats(stats)
390
+
391
+ # Add next-step hints
392
+ if indexed_count > 0:
393
+ # Check if LLM is configured for chat command
394
+ from mcp_vector_search.core.config_utils import (
395
+ get_openai_api_key,
396
+ get_openrouter_api_key,
397
+ )
398
+
399
+ config_dir = indexer.project_root / ".mcp-vector-search"
400
+ has_openai = get_openai_api_key(config_dir) is not None
401
+ has_openrouter = get_openrouter_api_key(config_dir) is not None
402
+ llm_configured = has_openai or has_openrouter
403
+
404
+ if llm_configured:
405
+ provider = "OpenAI" if has_openai else "OpenRouter"
406
+ chat_hint = f"[cyan]mcp-vector-search chat 'question'[/cyan] - Ask AI about your code [green](✓ {provider})[/green]"
407
+ else:
408
+ chat_hint = "[cyan]mcp-vector-search chat 'question'[/cyan] - Ask AI about your code [dim](requires API key)[/dim]"
409
+
410
+ steps = [
411
+ "[cyan]mcp-vector-search search 'your query'[/cyan] - Try semantic search",
412
+ chat_hint,
413
+ "[cyan]mcp-vector-search status[/cyan] - View detailed statistics",
414
+ ]
415
+ print_next_steps(steps, title="Ready to Search")
416
+ else:
417
+ print_info("\n[bold]No files were indexed. Possible reasons:[/bold]")
418
+ print_info(" • No matching files found for configured extensions")
419
+ print_info(" • All files already indexed (use --force to reindex)")
420
+ print_tip(
421
+ "Check configured extensions with [cyan]mcp-vector-search status[/cyan]"
422
+ )
423
+
424
+
425
+ async def _run_watch_mode(indexer: SemanticIndexer, show_progress: bool) -> None:
426
+ """Run indexing in watch mode."""
427
+ print_info("Starting watch mode - press Ctrl+C to stop")
428
+
429
+ # TODO: Implement file watching with incremental updates
430
+ # This would use the watchdog library to monitor file changes
431
+ # and call indexer.reindex_file() for changed files
432
+
433
+ print_error("Watch mode not yet implemented")
434
+ raise NotImplementedError("Watch mode will be implemented in Phase 1B")
435
+
436
+
437
+ @index_app.command("reindex")
438
+ def reindex_file(
439
+ ctx: typer.Context,
440
+ file_path: Path | None = typer.Argument(
441
+ None,
442
+ help="File to reindex (optional - if not provided, reindexes entire project)",
443
+ exists=True,
444
+ file_okay=True,
445
+ dir_okay=False,
446
+ readable=True,
447
+ ),
448
+ all: bool = typer.Option(
449
+ False,
450
+ "--all",
451
+ "-a",
452
+ help="Explicitly reindex entire project",
453
+ ),
454
+ force: bool = typer.Option(
455
+ False,
456
+ "--force",
457
+ "-f",
458
+ help="Skip confirmation prompt when reindexing entire project",
459
+ ),
460
+ ) -> None:
461
+ """Reindex files in the project.
462
+
463
+ Can reindex a specific file or the entire project:
464
+ - Without arguments: reindexes entire project (with confirmation)
465
+ - With file path: reindexes specific file
466
+ - With --all flag: explicitly reindexes entire project
467
+
468
+ Examples:
469
+ mcp-vector-search index reindex # Reindex entire project
470
+ mcp-vector-search index reindex --all # Explicitly reindex entire project
471
+ mcp-vector-search index reindex src/main.py # Reindex specific file
472
+ mcp-vector-search index reindex --all --force # Reindex entire project without confirmation
473
+ """
474
+ try:
475
+ project_root = ctx.obj.get("project_root") or Path.cwd()
476
+
477
+ # Determine what to reindex
478
+ if file_path is not None and all:
479
+ print_error("Cannot specify both a file path and --all flag")
480
+ raise typer.Exit(1)
481
+
482
+ if file_path is not None:
483
+ # Reindex specific file
484
+ asyncio.run(_reindex_single_file(project_root, file_path))
485
+ else:
486
+ # Reindex entire project
487
+ if not force and not all:
488
+ from ..output import confirm_action
489
+
490
+ if not confirm_action(
491
+ "This will reindex the entire project. Continue?", default=False
492
+ ):
493
+ print_info("Reindex operation cancelled")
494
+ raise typer.Exit(0)
495
+
496
+ # Use the full project reindexing
497
+ asyncio.run(_reindex_entire_project(project_root))
498
+
499
+ except typer.Exit:
500
+ # Re-raise Exit exceptions without logging as errors
501
+ raise
502
+ except Exception as e:
503
+ logger.error(f"Reindexing failed: {e}")
504
+ print_error(f"Reindexing failed: {e}")
505
+ raise typer.Exit(1)
506
+
507
+
508
+ async def _reindex_entire_project(project_root: Path) -> None:
509
+ """Reindex the entire project."""
510
+ print_info("Starting full project reindex...")
511
+
512
+ # Load project configuration
513
+ project_manager = ProjectManager(project_root)
514
+
515
+ if not project_manager.is_initialized():
516
+ raise ProjectNotFoundError(
517
+ f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
518
+ )
519
+
520
+ config = project_manager.load_config()
521
+
522
+ print_info(f"Project: {project_root}")
523
+ print_info(f"File extensions: {', '.join(config.file_extensions)}")
524
+ print_info(f"Embedding model: {config.embedding_model}")
525
+
526
+ # Setup embedding function and cache
527
+ cache_dir = (
528
+ get_default_cache_path(project_root) if config.cache_embeddings else None
529
+ )
530
+ embedding_function, cache = create_embedding_function(
531
+ model_name=config.embedding_model,
532
+ cache_dir=cache_dir,
533
+ cache_size=config.max_cache_size,
534
+ )
535
+
536
+ # Setup database
537
+ database = ChromaVectorDatabase(
538
+ persist_directory=config.index_path,
539
+ embedding_function=embedding_function,
540
+ )
541
+
542
+ # Setup indexer
543
+ indexer = SemanticIndexer(
544
+ database=database,
545
+ project_root=project_root,
546
+ config=config,
547
+ )
548
+
549
+ try:
550
+ async with database:
551
+ # First, clean the existing index
552
+ print_info("Clearing existing index...")
553
+ await database.reset()
554
+
555
+ # Then reindex everything with enhanced progress display
556
+ await _run_batch_indexing(indexer, force_reindex=True, show_progress=True)
557
+
558
+ except Exception as e:
559
+ logger.error(f"Full reindex error: {e}")
560
+ raise
561
+
562
+
563
+ async def _reindex_single_file(project_root: Path, file_path: Path) -> None:
564
+ """Reindex a single file."""
565
+ # Load project configuration
566
+ project_manager = ProjectManager(project_root)
567
+ config = project_manager.load_config()
568
+
569
+ # Make file path absolute if it's not already
570
+ if not file_path.is_absolute():
571
+ file_path = file_path.resolve()
572
+
573
+ # Check if file exists
574
+ if not file_path.exists():
575
+ print_error(f"File not found: {file_path}")
576
+ return
577
+
578
+ # Check if file is within project root
579
+ try:
580
+ file_path.relative_to(project_root)
581
+ except ValueError:
582
+ print_error(f"File {file_path} is not within project root {project_root}")
583
+ return
584
+
585
+ # Setup components
586
+ embedding_function, cache = create_embedding_function(
587
+ model_name=config.embedding_model,
588
+ cache_dir=(
589
+ get_default_cache_path(project_root) if config.cache_embeddings else None
590
+ ),
591
+ )
592
+
593
+ database = ChromaVectorDatabase(
594
+ persist_directory=config.index_path,
595
+ embedding_function=embedding_function,
596
+ )
597
+
598
+ indexer = SemanticIndexer(
599
+ database=database,
600
+ project_root=project_root,
601
+ config=config,
602
+ )
603
+
604
+ async with database:
605
+ success = await indexer.reindex_file(file_path)
606
+
607
+ if success:
608
+ print_success(f"Reindexed: {file_path}")
609
+ else:
610
+ print_error(f"Failed to reindex: {file_path}")
611
+ # Check if file extension is in the list of indexable extensions
612
+ if file_path.suffix not in config.file_extensions:
613
+ print_info(
614
+ f"Note: {file_path.suffix} is not in the configured file extensions: {', '.join(config.file_extensions)}"
615
+ )
616
+
617
+
618
+ @index_app.command("clean")
619
+ def clean_index(
620
+ ctx: typer.Context,
621
+ confirm: bool = typer.Option(
622
+ False,
623
+ "--yes",
624
+ "-y",
625
+ help="Skip confirmation prompt",
626
+ ),
627
+ ) -> None:
628
+ """Clean the search index (remove all indexed data)."""
629
+ try:
630
+ project_root = ctx.obj.get("project_root") or Path.cwd()
631
+
632
+ if not confirm:
633
+ from ..output import confirm_action
634
+
635
+ if not confirm_action(
636
+ "This will delete all indexed data. Continue?", default=False
637
+ ):
638
+ print_info("Clean operation cancelled")
639
+ raise typer.Exit(0)
640
+
641
+ asyncio.run(_clean_index(project_root))
642
+
643
+ except Exception as e:
644
+ logger.error(f"Clean failed: {e}")
645
+ print_error(f"Clean failed: {e}")
646
+ raise typer.Exit(1)
647
+
648
+
649
+ async def _clean_index(project_root: Path) -> None:
650
+ """Clean the search index."""
651
+ project_manager = ProjectManager(project_root)
652
+ config = project_manager.load_config()
653
+
654
+ # Setup database
655
+ embedding_function, _ = create_embedding_function(config.embedding_model)
656
+ database = ChromaVectorDatabase(
657
+ persist_directory=config.index_path,
658
+ embedding_function=embedding_function,
659
+ )
660
+
661
+ async with database:
662
+ await database.reset()
663
+ print_success("Index cleaned successfully")
664
+
665
+
666
+ # ============================================================================
667
+ # INDEX SUBCOMMANDS
668
+ # ============================================================================
669
+
670
+
671
+ @index_app.command("watch")
672
+ def watch_cmd(
673
+ project_root: Path = typer.Argument(
674
+ Path.cwd(),
675
+ help="Project root directory to watch",
676
+ exists=True,
677
+ file_okay=False,
678
+ dir_okay=True,
679
+ readable=True,
680
+ ),
681
+ ) -> None:
682
+ """👀 Watch for file changes and auto-update index.
683
+
684
+ Monitors your project directory for file changes and automatically updates
685
+ the search index when files are modified, added, or deleted.
686
+
687
+ Examples:
688
+ mcp-vector-search index watch
689
+ mcp-vector-search index watch /path/to/project
690
+ """
691
+ from .watch import app as watch_app
692
+
693
+ # Import and run watch command
694
+ watch_app()
695
+
696
+
697
+ # Import and register auto-index sub-app as a proper typer group
698
+ from .auto_index import auto_index_app # noqa: E402
699
+
700
+ index_app.add_typer(auto_index_app, name="auto", help="🔄 Manage automatic indexing")
701
+
702
+
703
+ @index_app.command("health")
704
+ def health_cmd(
705
+ project_root: Path | None = typer.Option(
706
+ None,
707
+ "--project-root",
708
+ "-p",
709
+ help="Project root directory",
710
+ exists=True,
711
+ file_okay=False,
712
+ dir_okay=True,
713
+ readable=True,
714
+ ),
715
+ repair: bool = typer.Option(
716
+ False,
717
+ "--repair",
718
+ help="Attempt to repair index issues",
719
+ ),
720
+ ) -> None:
721
+ """🩺 Check index health and optionally repair.
722
+
723
+ Validates the search index integrity and provides diagnostic information.
724
+ Can attempt to repair common issues automatically.
725
+
726
+ Examples:
727
+ mcp-vector-search index health
728
+ mcp-vector-search index health --repair
729
+ """
730
+ from .reset import health_main
731
+
732
+ # Call the health function from reset.py
733
+ health_main(project_root=project_root, repair=repair)
734
+
735
+
736
+ def _prune_error_log(log_path: Path, max_lines: int = 1000) -> None:
737
+ """Prune error log to keep only the most recent N lines.
738
+
739
+ Args:
740
+ log_path: Path to the error log file
741
+ max_lines: Maximum number of lines to keep (default: 1000)
742
+ """
743
+ try:
744
+ with open(log_path) as f:
745
+ lines = f.readlines()
746
+
747
+ if len(lines) > max_lines:
748
+ # Keep only the last max_lines lines
749
+ pruned_lines = lines[-max_lines:]
750
+
751
+ with open(log_path, "w") as f:
752
+ f.writelines(pruned_lines)
753
+
754
+ logger.debug(
755
+ f"Pruned error log from {len(lines)} to {len(pruned_lines)} lines"
756
+ )
757
+ except Exception as e:
758
+ logger.warning(f"Failed to prune error log: {e}")
759
+
760
+
761
+ if __name__ == "__main__":
762
+ index_app()