mcp-vector-search 0.4.14__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (30) hide show
  1. mcp_vector_search/__init__.py +2 -2
  2. mcp_vector_search/cli/commands/index.py +73 -31
  3. mcp_vector_search/cli/commands/init.py +189 -113
  4. mcp_vector_search/cli/commands/install.py +525 -113
  5. mcp_vector_search/cli/commands/mcp.py +201 -151
  6. mcp_vector_search/cli/commands/reset.py +41 -41
  7. mcp_vector_search/cli/commands/search.py +73 -14
  8. mcp_vector_search/cli/commands/status.py +51 -17
  9. mcp_vector_search/cli/didyoumean.py +254 -246
  10. mcp_vector_search/cli/main.py +114 -43
  11. mcp_vector_search/cli/output.py +152 -0
  12. mcp_vector_search/cli/suggestions.py +246 -197
  13. mcp_vector_search/core/database.py +81 -49
  14. mcp_vector_search/core/indexer.py +10 -4
  15. mcp_vector_search/core/search.py +17 -6
  16. mcp_vector_search/mcp/__main__.py +1 -1
  17. mcp_vector_search/mcp/server.py +211 -203
  18. mcp_vector_search/parsers/__init__.py +7 -0
  19. mcp_vector_search/parsers/dart.py +605 -0
  20. mcp_vector_search/parsers/html.py +413 -0
  21. mcp_vector_search/parsers/php.py +694 -0
  22. mcp_vector_search/parsers/registry.py +21 -1
  23. mcp_vector_search/parsers/ruby.py +678 -0
  24. mcp_vector_search/parsers/text.py +32 -26
  25. mcp_vector_search/utils/gitignore.py +72 -71
  26. {mcp_vector_search-0.4.14.dist-info → mcp_vector_search-0.5.1.dist-info}/METADATA +76 -5
  27. {mcp_vector_search-0.4.14.dist-info → mcp_vector_search-0.5.1.dist-info}/RECORD +30 -26
  28. {mcp_vector_search-0.4.14.dist-info → mcp_vector_search-0.5.1.dist-info}/WHEEL +0 -0
  29. {mcp_vector_search-0.4.14.dist-info → mcp_vector_search-0.5.1.dist-info}/entry_points.txt +0 -0
  30. {mcp_vector_search-0.4.14.dist-info → mcp_vector_search-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -100,7 +100,7 @@ class VectorDatabase(ABC):
100
100
  @abstractmethod
101
101
  async def health_check(self) -> bool:
102
102
  """Check database health and integrity.
103
-
103
+
104
104
  Returns:
105
105
  True if database is healthy, False otherwise
106
106
  """
@@ -145,7 +145,7 @@ class ChromaVectorDatabase(VectorDatabase):
145
145
 
146
146
  # Ensure directory exists
147
147
  self.persist_directory.mkdir(parents=True, exist_ok=True)
148
-
148
+
149
149
  # Check for corruption before initializing
150
150
  await self._detect_and_recover_corruption()
151
151
 
@@ -172,10 +172,19 @@ class ChromaVectorDatabase(VectorDatabase):
172
172
  except Exception as e:
173
173
  # Check if this is a corruption error
174
174
  error_msg = str(e).lower()
175
- if any(indicator in error_msg for indicator in [
176
- "pickle", "unpickling", "eof", "ran out of input",
177
- "hnsw", "index", "deserialize", "corrupt"
178
- ]):
175
+ if any(
176
+ indicator in error_msg
177
+ for indicator in [
178
+ "pickle",
179
+ "unpickling",
180
+ "eof",
181
+ "ran out of input",
182
+ "hnsw",
183
+ "index",
184
+ "deserialize",
185
+ "corrupt",
186
+ ]
187
+ ):
179
188
  logger.warning(f"Detected index corruption: {e}")
180
189
  # Try to recover
181
190
  await self._recover_from_corruption()
@@ -468,51 +477,57 @@ class ChromaVectorDatabase(VectorDatabase):
468
477
  """Detect and recover from index corruption proactively."""
469
478
  # Check for common corruption indicators in ChromaDB files
470
479
  chroma_db_path = self.persist_directory / "chroma.sqlite3"
471
-
480
+
472
481
  # If database doesn't exist yet, nothing to check
473
482
  if not chroma_db_path.exists():
474
483
  return
475
-
484
+
476
485
  # Check for HNSW index files that might be corrupted
477
486
  collection_path = self.persist_directory / "chroma-collections.parquet"
478
487
  index_path = self.persist_directory / "index"
479
-
488
+
480
489
  if index_path.exists():
481
490
  # Look for pickle files in the index
482
491
  pickle_files = list(index_path.glob("**/*.pkl"))
483
492
  pickle_files.extend(list(index_path.glob("**/*.pickle")))
484
-
493
+
485
494
  for pickle_file in pickle_files:
486
495
  try:
487
496
  # Try to read the pickle file to detect corruption
488
497
  import pickle
489
- with open(pickle_file, 'rb') as f:
498
+
499
+ with open(pickle_file, "rb") as f:
490
500
  pickle.load(f)
491
501
  except (EOFError, pickle.UnpicklingError, Exception) as e:
492
- logger.warning(f"Corrupted index file detected: {pickle_file} - {e}")
502
+ logger.warning(
503
+ f"Corrupted index file detected: {pickle_file} - {e}"
504
+ )
493
505
  await self._recover_from_corruption()
494
506
  return
495
507
 
496
508
  async def _recover_from_corruption(self) -> None:
497
509
  """Recover from index corruption by rebuilding the index."""
498
510
  logger.info("Attempting to recover from index corruption...")
499
-
511
+
500
512
  # Create backup directory
501
- backup_dir = self.persist_directory.parent / f"{self.persist_directory.name}_backup"
513
+ backup_dir = (
514
+ self.persist_directory.parent / f"{self.persist_directory.name}_backup"
515
+ )
502
516
  backup_dir.mkdir(exist_ok=True)
503
-
517
+
504
518
  # Backup current state (in case we need it)
505
519
  import time
520
+
506
521
  timestamp = int(time.time())
507
522
  backup_path = backup_dir / f"backup_{timestamp}"
508
-
523
+
509
524
  if self.persist_directory.exists():
510
525
  try:
511
526
  shutil.copytree(self.persist_directory, backup_path)
512
527
  logger.info(f"Created backup at {backup_path}")
513
528
  except Exception as e:
514
529
  logger.warning(f"Could not create backup: {e}")
515
-
530
+
516
531
  # Clear the corrupted index
517
532
  if self.persist_directory.exists():
518
533
  try:
@@ -523,14 +538,14 @@ class ChromaVectorDatabase(VectorDatabase):
523
538
  raise IndexCorruptionError(
524
539
  f"Could not clear corrupted index: {e}"
525
540
  ) from e
526
-
541
+
527
542
  # Recreate the directory
528
543
  self.persist_directory.mkdir(parents=True, exist_ok=True)
529
544
  logger.info("Index directory recreated. Please re-index your codebase.")
530
545
 
531
546
  async def health_check(self) -> bool:
532
547
  """Check database health and integrity.
533
-
548
+
534
549
  Returns:
535
550
  True if database is healthy, False otherwise
536
551
  """
@@ -539,35 +554,42 @@ class ChromaVectorDatabase(VectorDatabase):
539
554
  if not self._client or not self._collection:
540
555
  logger.warning("Database not initialized")
541
556
  return False
542
-
557
+
543
558
  # Try a simple operation to test the connection
544
559
  try:
545
560
  # Attempt to get count - this will fail if index is corrupted
546
561
  count = self._collection.count()
547
562
  logger.debug(f"Health check passed: {count} chunks in database")
548
-
563
+
549
564
  # Try a minimal query to ensure search works
550
565
  self._collection.query(
551
- query_texts=["test"],
552
- n_results=1,
553
- include=["metadatas"]
566
+ query_texts=["test"], n_results=1, include=["metadatas"]
554
567
  )
555
-
568
+
556
569
  return True
557
-
570
+
558
571
  except Exception as e:
559
572
  error_msg = str(e).lower()
560
- if any(indicator in error_msg for indicator in [
561
- "pickle", "unpickling", "eof", "ran out of input",
562
- "hnsw", "index", "deserialize", "corrupt"
563
- ]):
573
+ if any(
574
+ indicator in error_msg
575
+ for indicator in [
576
+ "pickle",
577
+ "unpickling",
578
+ "eof",
579
+ "ran out of input",
580
+ "hnsw",
581
+ "index",
582
+ "deserialize",
583
+ "corrupt",
584
+ ]
585
+ ):
564
586
  logger.error(f"Index corruption detected during health check: {e}")
565
587
  return False
566
588
  else:
567
589
  # Some other error
568
590
  logger.warning(f"Health check failed: {e}")
569
591
  return False
570
-
592
+
571
593
  except Exception as e:
572
594
  logger.error(f"Health check error: {e}")
573
595
  return False
@@ -793,7 +815,7 @@ class PooledChromaVectorDatabase(VectorDatabase):
793
815
  file_types=file_type_counts,
794
816
  index_size_mb=index_size_mb,
795
817
  last_updated="unknown", # ChromaDB doesn't track this
796
- embedding_model="unknown" # TODO: Track this in metadata
818
+ embedding_model="unknown", # TODO: Track this in metadata
797
819
  )
798
820
 
799
821
  except Exception as e:
@@ -868,24 +890,31 @@ class PooledChromaVectorDatabase(VectorDatabase):
868
890
  pool_healthy = await self._pool.health_check()
869
891
  if not pool_healthy:
870
892
  return False
871
-
893
+
872
894
  # Try a simple query to verify database integrity
873
895
  try:
874
896
  async with self._pool.get_connection() as conn:
875
897
  # Test basic operations
876
898
  conn.collection.count()
877
899
  conn.collection.query(
878
- query_texts=["test"],
879
- n_results=1,
880
- include=["metadatas"]
900
+ query_texts=["test"], n_results=1, include=["metadatas"]
881
901
  )
882
902
  return True
883
903
  except Exception as e:
884
904
  error_msg = str(e).lower()
885
- if any(indicator in error_msg for indicator in [
886
- "pickle", "unpickling", "eof", "ran out of input",
887
- "hnsw", "index", "deserialize", "corrupt"
888
- ]):
905
+ if any(
906
+ indicator in error_msg
907
+ for indicator in [
908
+ "pickle",
909
+ "unpickling",
910
+ "eof",
911
+ "ran out of input",
912
+ "hnsw",
913
+ "index",
914
+ "deserialize",
915
+ "corrupt",
916
+ ]
917
+ ):
889
918
  logger.error(f"Index corruption detected: {e}")
890
919
  # Attempt recovery
891
920
  await self._recover_from_corruption()
@@ -896,30 +925,33 @@ class PooledChromaVectorDatabase(VectorDatabase):
896
925
  except Exception as e:
897
926
  logger.error(f"Health check error: {e}")
898
927
  return False
899
-
928
+
900
929
  async def _recover_from_corruption(self) -> None:
901
930
  """Recover from index corruption by rebuilding the index."""
902
931
  logger.info("Attempting to recover from index corruption...")
903
-
932
+
904
933
  # Close the pool first
905
934
  await self._pool.close()
906
-
935
+
907
936
  # Create backup directory
908
- backup_dir = self.persist_directory.parent / f"{self.persist_directory.name}_backup"
937
+ backup_dir = (
938
+ self.persist_directory.parent / f"{self.persist_directory.name}_backup"
939
+ )
909
940
  backup_dir.mkdir(exist_ok=True)
910
-
941
+
911
942
  # Backup current state
912
943
  import time
944
+
913
945
  timestamp = int(time.time())
914
946
  backup_path = backup_dir / f"backup_{timestamp}"
915
-
947
+
916
948
  if self.persist_directory.exists():
917
949
  try:
918
950
  shutil.copytree(self.persist_directory, backup_path)
919
951
  logger.info(f"Created backup at {backup_path}")
920
952
  except Exception as e:
921
953
  logger.warning(f"Could not create backup: {e}")
922
-
954
+
923
955
  # Clear the corrupted index
924
956
  if self.persist_directory.exists():
925
957
  try:
@@ -930,10 +962,10 @@ class PooledChromaVectorDatabase(VectorDatabase):
930
962
  raise IndexCorruptionError(
931
963
  f"Could not clear corrupted index: {e}"
932
964
  ) from e
933
-
965
+
934
966
  # Recreate the directory
935
967
  self.persist_directory.mkdir(parents=True, exist_ok=True)
936
-
968
+
937
969
  # Reinitialize the pool
938
970
  await self._pool.initialize()
939
971
  logger.info("Index recovered. Please re-index your codebase.")
@@ -9,7 +9,7 @@ from loguru import logger
9
9
 
10
10
  from ..config.defaults import DEFAULT_IGNORE_PATTERNS
11
11
  from ..parsers.registry import get_parser_registry
12
- from ..utils.gitignore import create_gitignore_parser, GitignoreParser
12
+ from ..utils.gitignore import create_gitignore_parser
13
13
  from .database import VectorDatabase
14
14
  from .exceptions import ParsingError
15
15
  from .models import CodeChunk
@@ -51,7 +51,9 @@ class SemanticIndexer:
51
51
  # Initialize gitignore parser
52
52
  try:
53
53
  self.gitignore_parser = create_gitignore_parser(project_root)
54
- logger.debug(f"Loaded {len(self.gitignore_parser.patterns)} gitignore patterns")
54
+ logger.debug(
55
+ f"Loaded {len(self.gitignore_parser.patterns)} gitignore patterns"
56
+ )
55
57
  except Exception as e:
56
58
  logger.warning(f"Failed to load gitignore patterns: {e}")
57
59
  self.gitignore_parser = None
@@ -376,14 +378,18 @@ class SemanticIndexer:
376
378
  # Check each part of the path against default ignore patterns
377
379
  for part in relative_path.parts:
378
380
  if part in self._ignore_patterns:
379
- logger.debug(f"Path ignored by default pattern '{part}': {file_path}")
381
+ logger.debug(
382
+ f"Path ignored by default pattern '{part}': {file_path}"
383
+ )
380
384
  return True
381
385
 
382
386
  # Check if any parent directory should be ignored
383
387
  for parent in relative_path.parents:
384
388
  for part in parent.parts:
385
389
  if part in self._ignore_patterns:
386
- logger.debug(f"Path ignored by parent pattern '{part}': {file_path}")
390
+ logger.debug(
391
+ f"Path ignored by parent pattern '{part}': {file_path}"
392
+ )
387
393
  return True
388
394
 
389
395
  return False
@@ -68,7 +68,7 @@ class SemanticSearchEngine:
68
68
 
69
69
  # Health check before search
70
70
  try:
71
- if hasattr(self.database, 'health_check'):
71
+ if hasattr(self.database, "health_check"):
72
72
  is_healthy = await self.database.health_check()
73
73
  if not is_healthy:
74
74
  logger.warning("Database health check failed - attempting recovery")
@@ -118,12 +118,23 @@ class SemanticSearchEngine:
118
118
  except Exception as e:
119
119
  error_msg = str(e).lower()
120
120
  # Check for corruption indicators
121
- if any(indicator in error_msg for indicator in [
122
- "pickle", "unpickling", "eof", "ran out of input",
123
- "hnsw", "index", "deserialize", "corrupt"
124
- ]):
121
+ if any(
122
+ indicator in error_msg
123
+ for indicator in [
124
+ "pickle",
125
+ "unpickling",
126
+ "eof",
127
+ "ran out of input",
128
+ "hnsw",
129
+ "index",
130
+ "deserialize",
131
+ "corrupt",
132
+ ]
133
+ ):
125
134
  logger.error(f"Index corruption detected during search: {e}")
126
- logger.info("The index appears to be corrupted. Please run 'mcp-vector-search reset' to clear the index and then 'mcp-vector-search index' to rebuild it.")
135
+ logger.info(
136
+ "The index appears to be corrupted. Please run 'mcp-vector-search reset' to clear the index and then 'mcp-vector-search index' to rebuild it."
137
+ )
127
138
  raise SearchError(
128
139
  "Index corruption detected. Please run 'mcp-vector-search reset' followed by 'mcp-vector-search index' to rebuild."
129
140
  ) from e
@@ -11,7 +11,7 @@ def main():
11
11
  """Main entry point for the MCP server."""
12
12
  # Allow specifying project root as command line argument
13
13
  project_root = Path(sys.argv[1]) if len(sys.argv) > 1 else None
14
-
14
+
15
15
  try:
16
16
  asyncio.run(run_mcp_server(project_root))
17
17
  except KeyboardInterrupt: