mcp-vector-search 1.0.3__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +48 -1
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +35 -0
  7. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  8. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  9. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  10. mcp_vector_search/analysis/collectors/smells.py +325 -0
  11. mcp_vector_search/analysis/debt.py +516 -0
  12. mcp_vector_search/analysis/interpretation.py +685 -0
  13. mcp_vector_search/analysis/metrics.py +74 -1
  14. mcp_vector_search/analysis/reporters/__init__.py +3 -1
  15. mcp_vector_search/analysis/reporters/console.py +424 -0
  16. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  17. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  18. mcp_vector_search/analysis/storage/__init__.py +93 -0
  19. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  20. mcp_vector_search/analysis/storage/schema.py +245 -0
  21. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  22. mcp_vector_search/analysis/trends.py +308 -0
  23. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  24. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  25. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  26. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  27. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  28. mcp_vector_search/cli/commands/analyze.py +665 -11
  29. mcp_vector_search/cli/commands/chat.py +193 -0
  30. mcp_vector_search/cli/commands/index.py +600 -2
  31. mcp_vector_search/cli/commands/index_background.py +467 -0
  32. mcp_vector_search/cli/commands/search.py +194 -1
  33. mcp_vector_search/cli/commands/setup.py +64 -13
  34. mcp_vector_search/cli/commands/status.py +302 -3
  35. mcp_vector_search/cli/commands/visualize/cli.py +26 -10
  36. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +8 -4
  37. mcp_vector_search/cli/commands/visualize/graph_builder.py +167 -234
  38. mcp_vector_search/cli/commands/visualize/server.py +304 -15
  39. mcp_vector_search/cli/commands/visualize/templates/base.py +60 -6
  40. mcp_vector_search/cli/commands/visualize/templates/scripts.py +2100 -65
  41. mcp_vector_search/cli/commands/visualize/templates/styles.py +1297 -88
  42. mcp_vector_search/cli/didyoumean.py +5 -0
  43. mcp_vector_search/cli/main.py +16 -5
  44. mcp_vector_search/cli/output.py +134 -5
  45. mcp_vector_search/config/thresholds.py +89 -1
  46. mcp_vector_search/core/__init__.py +16 -0
  47. mcp_vector_search/core/database.py +39 -2
  48. mcp_vector_search/core/embeddings.py +24 -0
  49. mcp_vector_search/core/git.py +380 -0
  50. mcp_vector_search/core/indexer.py +445 -84
  51. mcp_vector_search/core/llm_client.py +9 -4
  52. mcp_vector_search/core/models.py +88 -1
  53. mcp_vector_search/core/relationships.py +473 -0
  54. mcp_vector_search/core/search.py +1 -1
  55. mcp_vector_search/mcp/server.py +795 -4
  56. mcp_vector_search/parsers/python.py +285 -5
  57. mcp_vector_search/utils/gitignore.py +0 -3
  58. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +3 -2
  59. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/RECORD +62 -39
  60. mcp_vector_search/cli/commands/visualize.py.original +0 -2536
  61. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +0 -0
  62. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +0 -0
  63. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,467 @@
1
+ """Background indexing entry point for detached process execution."""
2
+
3
+ import argparse
4
+ import asyncio
5
+ import json
6
+ import logging
7
+ import os
8
+ import signal
9
+ import sys
10
+ import time
11
+ from datetime import UTC, datetime
12
+ from pathlib import Path
13
+
14
+ from loguru import logger
15
+
16
+ from ...core.database import ChromaVectorDatabase
17
+ from ...core.embeddings import create_embedding_function
18
+ from ...core.indexer import SemanticIndexer
19
+ from ...core.project import ProjectManager
20
+
21
+
22
+ class BackgroundIndexer:
23
+ """Background indexer with progress tracking."""
24
+
25
+ def __init__(self, project_root: Path, progress_file: Path, log_file: Path):
26
+ """Initialize background indexer.
27
+
28
+ Args:
29
+ project_root: Project root directory
30
+ progress_file: Path to progress JSON file
31
+ log_file: Path to log file
32
+ """
33
+ self.project_root = project_root
34
+ self.progress_file = progress_file
35
+ self.log_file = log_file
36
+ self.progress_data = {
37
+ "pid": os.getpid(),
38
+ "started_at": datetime.now(UTC).isoformat(),
39
+ "status": "initializing",
40
+ "total_files": 0,
41
+ "processed_files": 0,
42
+ "current_file": None,
43
+ "chunks_created": 0,
44
+ "errors": 0,
45
+ "last_updated": datetime.now(UTC).isoformat(),
46
+ "eta_seconds": 0,
47
+ }
48
+ self.start_time = time.time()
49
+
50
+ def _write_progress(self) -> None:
51
+ """Write progress data to file atomically."""
52
+ # Atomic write: temp file + rename
53
+ self.progress_data["last_updated"] = datetime.now(UTC).isoformat()
54
+
55
+ temp_file = self.progress_file.with_suffix(".tmp")
56
+ try:
57
+ with open(temp_file, "w") as f:
58
+ json.dump(self.progress_data, f, indent=2)
59
+ # Atomic rename
60
+ temp_file.replace(self.progress_file)
61
+ except Exception as e:
62
+ logger.error(f"Failed to write progress file: {e}")
63
+ if temp_file.exists():
64
+ temp_file.unlink()
65
+
66
+ def _update_progress(
67
+ self,
68
+ status: str | None = None,
69
+ current_file: str | None = None,
70
+ processed_increment: int = 0,
71
+ chunks_increment: int = 0,
72
+ error_increment: int = 0,
73
+ ) -> None:
74
+ """Update progress data and write to file.
75
+
76
+ Args:
77
+ status: New status value
78
+ current_file: Current file being processed
79
+ processed_increment: Number of files processed to add
80
+ chunks_increment: Number of chunks created to add
81
+ error_increment: Number of errors to add
82
+ """
83
+ if status:
84
+ self.progress_data["status"] = status
85
+
86
+ if current_file is not None:
87
+ self.progress_data["current_file"] = current_file
88
+
89
+ self.progress_data["processed_files"] += processed_increment
90
+ self.progress_data["chunks_created"] += chunks_increment
91
+ self.progress_data["errors"] += error_increment
92
+
93
+ # Calculate ETA
94
+ elapsed = time.time() - self.start_time
95
+ processed = self.progress_data["processed_files"]
96
+ total = self.progress_data["total_files"]
97
+
98
+ if processed > 0 and total > processed:
99
+ rate = elapsed / processed
100
+ remaining = total - processed
101
+ self.progress_data["eta_seconds"] = int(rate * remaining)
102
+ else:
103
+ self.progress_data["eta_seconds"] = 0
104
+
105
+ self._write_progress()
106
+
107
+ async def run(
108
+ self, force_reindex: bool = False, extensions: str | None = None
109
+ ) -> None:
110
+ """Run background indexing process.
111
+
112
+ Args:
113
+ force_reindex: Force reindexing of all files
114
+ extensions: Override file extensions (comma-separated)
115
+ """
116
+ try:
117
+ # Load project configuration
118
+ logger.info(f"Loading project configuration from {self.project_root}")
119
+ project_manager = ProjectManager(self.project_root)
120
+
121
+ if not project_manager.is_initialized():
122
+ raise RuntimeError(
123
+ f"Project not initialized at {self.project_root}. Run 'mcp-vector-search init' first."
124
+ )
125
+
126
+ config = project_manager.load_config()
127
+
128
+ # Override extensions if provided
129
+ if extensions:
130
+ file_extensions = [ext.strip() for ext in extensions.split(",")]
131
+ file_extensions = [
132
+ ext if ext.startswith(".") else f".{ext}" for ext in file_extensions
133
+ ]
134
+ config = config.model_copy(update={"file_extensions": file_extensions})
135
+
136
+ logger.info(f"File extensions: {', '.join(config.file_extensions)}")
137
+ logger.info(f"Embedding model: {config.embedding_model}")
138
+
139
+ # Setup embedding function and cache
140
+ from ...config.defaults import get_default_cache_path
141
+
142
+ cache_dir = (
143
+ get_default_cache_path(self.project_root)
144
+ if config.cache_embeddings
145
+ else None
146
+ )
147
+ embedding_function, cache = create_embedding_function(
148
+ model_name=config.embedding_model,
149
+ cache_dir=cache_dir,
150
+ cache_size=config.max_cache_size,
151
+ )
152
+
153
+ # Setup database
154
+ database = ChromaVectorDatabase(
155
+ persist_directory=config.index_path,
156
+ embedding_function=embedding_function,
157
+ )
158
+
159
+ # Setup indexer
160
+ indexer = SemanticIndexer(
161
+ database=database,
162
+ project_root=self.project_root,
163
+ config=config,
164
+ )
165
+
166
+ async with database:
167
+ # Get files to index
168
+ self._update_progress(status="scanning")
169
+ logger.info("Scanning for indexable files...")
170
+ indexable_files, files_to_index = await indexer.get_files_to_index(
171
+ force_reindex=force_reindex
172
+ )
173
+
174
+ total_files = len(files_to_index)
175
+ self.progress_data["total_files"] = total_files
176
+ self._write_progress()
177
+
178
+ if total_files == 0:
179
+ logger.info("No files need indexing")
180
+ self._update_progress(status="completed")
181
+ return
182
+
183
+ logger.info(f"Found {total_files} files to index")
184
+ self._update_progress(status="running")
185
+
186
+ # Index files with progress tracking
187
+ async for (
188
+ file_path,
189
+ chunks_added,
190
+ success,
191
+ ) in indexer.index_files_with_progress(files_to_index, force_reindex):
192
+ # Update progress
193
+ try:
194
+ relative_path = str(file_path.relative_to(self.project_root))
195
+ except ValueError:
196
+ relative_path = str(file_path)
197
+
198
+ self._update_progress(
199
+ current_file=relative_path,
200
+ processed_increment=1,
201
+ chunks_increment=chunks_added if success else 0,
202
+ error_increment=0 if success else 1,
203
+ )
204
+
205
+ # Rebuild directory index
206
+ try:
207
+ logger.info("Rebuilding directory index...")
208
+ chunk_stats = {}
209
+ for file_path in files_to_index:
210
+ try:
211
+ mtime = os.path.getmtime(file_path)
212
+ chunk_stats[str(file_path)] = {
213
+ "modified": mtime,
214
+ "chunks": 1,
215
+ }
216
+ except OSError:
217
+ pass
218
+
219
+ indexer.directory_index.rebuild_from_files(
220
+ files_to_index, self.project_root, chunk_stats=chunk_stats
221
+ )
222
+ indexer.directory_index.save()
223
+ except Exception as e:
224
+ logger.error(f"Failed to update directory index: {e}")
225
+
226
+ # Compute relationships
227
+ try:
228
+ logger.info("Marking relationships for background computation...")
229
+ self._update_progress(status="computing_relationships")
230
+ all_chunks = await indexer.database.get_all_chunks()
231
+
232
+ if len(all_chunks) > 0:
233
+ # Use background=True to avoid blocking startup
234
+ await indexer.relationship_store.compute_and_store(
235
+ all_chunks, indexer.database, background=True
236
+ )
237
+ logger.info("✓ Relationships marked for background computation")
238
+ logger.info(
239
+ " → Relationships will be lazy-loaded during visualization"
240
+ )
241
+ except Exception as e:
242
+ logger.warning(f"Failed to compute relationships: {e}")
243
+
244
+ # Mark as completed
245
+ self._update_progress(status="completed", current_file=None)
246
+ logger.info(
247
+ f"Indexing completed: {self.progress_data['processed_files']} files, "
248
+ f"{self.progress_data['chunks_created']} chunks, "
249
+ f"{self.progress_data['errors']} errors"
250
+ )
251
+
252
+ except Exception as e:
253
+ logger.error(f"Background indexing failed: {e}", exc_info=True)
254
+ self._update_progress(status="failed")
255
+ raise
256
+
257
+ async def run_relationships_only(self) -> None:
258
+ """Run relationship computation only (skip file indexing).
259
+
260
+ This is used when user wants to compute relationships in background
261
+ after indexing has already completed.
262
+ """
263
+ try:
264
+ # Load project configuration
265
+ logger.info(f"Loading project configuration from {self.project_root}")
266
+ project_manager = ProjectManager(self.project_root)
267
+
268
+ if not project_manager.is_initialized():
269
+ raise RuntimeError(
270
+ f"Project not initialized at {self.project_root}. Run 'mcp-vector-search init' first."
271
+ )
272
+
273
+ config = project_manager.load_config()
274
+
275
+ logger.info(f"Embedding model: {config.embedding_model}")
276
+
277
+ # Setup embedding function and cache
278
+ from ...config.defaults import get_default_cache_path
279
+
280
+ cache_dir = (
281
+ get_default_cache_path(self.project_root)
282
+ if config.cache_embeddings
283
+ else None
284
+ )
285
+ embedding_function, cache = create_embedding_function(
286
+ model_name=config.embedding_model,
287
+ cache_dir=cache_dir,
288
+ cache_size=config.max_cache_size,
289
+ )
290
+
291
+ # Setup database
292
+ database = ChromaVectorDatabase(
293
+ persist_directory=config.index_path,
294
+ embedding_function=embedding_function,
295
+ )
296
+
297
+ # Setup indexer (for relationship store access)
298
+ indexer = SemanticIndexer(
299
+ database=database,
300
+ project_root=self.project_root,
301
+ config=config,
302
+ )
303
+
304
+ async with database:
305
+ # Get chunks for relationship computation
306
+ self._update_progress(status="loading_chunks")
307
+ logger.info("Loading chunks from database...")
308
+ all_chunks = await indexer.database.get_all_chunks()
309
+
310
+ if len(all_chunks) == 0:
311
+ logger.warning("No chunks found in database")
312
+ self._update_progress(status="completed")
313
+ return
314
+
315
+ logger.info(f"Found {len(all_chunks)} chunks")
316
+
317
+ # Compute relationships
318
+ logger.info("Computing semantic relationships...")
319
+ self._update_progress(status="computing_relationships")
320
+
321
+ rel_stats = await indexer.relationship_store.compute_and_store(
322
+ all_chunks, indexer.database, background=False
323
+ )
324
+
325
+ logger.info(
326
+ f"Computed {rel_stats['semantic_links']} semantic links "
327
+ f"in {rel_stats['computation_time']:.1f}s"
328
+ )
329
+
330
+ # Mark as completed
331
+ self._update_progress(status="completed", current_file=None)
332
+ logger.info("Relationship computation completed")
333
+
334
+ except Exception as e:
335
+ logger.error(
336
+ f"Background relationship computation failed: {e}", exc_info=True
337
+ )
338
+ self._update_progress(status="failed")
339
+ raise
340
+
341
+
342
+ def setup_logging(log_file: Path) -> None:
343
+ """Setup logging to file.
344
+
345
+ Args:
346
+ log_file: Path to log file
347
+ """
348
+ # Remove default handler
349
+ logger.remove()
350
+
351
+ # Add file handler
352
+ logger.add(
353
+ log_file,
354
+ format="{time:YYYY-MM-DD HH:mm:ss} [{level}] {message}",
355
+ level="INFO",
356
+ rotation="10 MB",
357
+ retention="7 days",
358
+ )
359
+
360
+ # Suppress noisy libraries
361
+ logging.getLogger("httpx").setLevel(logging.WARNING)
362
+ logging.getLogger("chromadb").setLevel(logging.WARNING)
363
+
364
+
365
+ def cleanup_progress_file(progress_file: Path) -> None:
366
+ """Clean up progress file on exit.
367
+
368
+ Args:
369
+ progress_file: Path to progress file
370
+ """
371
+ try:
372
+ if progress_file.exists():
373
+ progress_file.unlink()
374
+ logger.info("Cleaned up progress file")
375
+ except Exception as e:
376
+ logger.error(f"Failed to clean up progress file: {e}")
377
+
378
+
379
+ def main() -> None:
380
+ """Main entry point for background indexing."""
381
+ parser = argparse.ArgumentParser(
382
+ description="Background indexing process for MCP Vector Search"
383
+ )
384
+ parser.add_argument(
385
+ "--project-root",
386
+ type=Path,
387
+ required=True,
388
+ help="Project root directory",
389
+ )
390
+ parser.add_argument(
391
+ "--force",
392
+ action="store_true",
393
+ help="Force reindexing of all files",
394
+ )
395
+ parser.add_argument(
396
+ "--extensions",
397
+ type=str,
398
+ help="Override file extensions (comma-separated)",
399
+ )
400
+ parser.add_argument(
401
+ "--relationships-only",
402
+ action="store_true",
403
+ help="Only compute relationships (skip file indexing)",
404
+ )
405
+
406
+ args = parser.parse_args()
407
+
408
+ # Setup paths
409
+ project_root = args.project_root.resolve()
410
+ config_dir = project_root / ".mcp-vector-search"
411
+
412
+ # Use different files for relationships-only mode
413
+ if args.relationships_only:
414
+ progress_file = config_dir / "relationships_progress.json"
415
+ log_file = config_dir / "relationships_background.log"
416
+ else:
417
+ progress_file = config_dir / "indexing_progress.json"
418
+ log_file = config_dir / "indexing_background.log"
419
+
420
+ # Setup logging
421
+ setup_logging(log_file)
422
+
423
+ if args.relationships_only:
424
+ logger.info(
425
+ f"Starting background relationship computation (PID: {os.getpid()})"
426
+ )
427
+ else:
428
+ logger.info(f"Starting background indexing (PID: {os.getpid()})")
429
+
430
+ logger.info(f"Project root: {project_root}")
431
+ logger.info(f"Force reindex: {args.force}")
432
+
433
+ # Create background indexer
434
+ bg_indexer = BackgroundIndexer(project_root, progress_file, log_file)
435
+
436
+ # Handle SIGTERM for graceful shutdown
437
+ def signal_handler(signum, frame):
438
+ logger.info(f"Received signal {signum}, shutting down...")
439
+ bg_indexer._update_progress(status="cancelled")
440
+ cleanup_progress_file(progress_file)
441
+ sys.exit(0)
442
+
443
+ signal.signal(signal.SIGTERM, signal_handler)
444
+ signal.signal(signal.SIGINT, signal_handler)
445
+
446
+ # Run indexing or relationships-only
447
+ try:
448
+ if args.relationships_only:
449
+ asyncio.run(bg_indexer.run_relationships_only())
450
+ else:
451
+ asyncio.run(
452
+ bg_indexer.run(force_reindex=args.force, extensions=args.extensions)
453
+ )
454
+ except KeyboardInterrupt:
455
+ logger.info("Interrupted by user")
456
+ bg_indexer._update_progress(status="cancelled")
457
+ except Exception as e:
458
+ logger.error(f"Fatal error: {e}", exc_info=True)
459
+ bg_indexer._update_progress(status="failed")
460
+ sys.exit(1)
461
+ finally:
462
+ # Keep progress file on completion for status command to read
463
+ pass
464
+
465
+
466
+ if __name__ == "__main__":
467
+ main()