mcp-vector-search 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
1
  """MCP Vector Search - CLI-first semantic code search with MCP integration."""
2
2
 
3
- __version__ = "0.5.1"
4
- __build__ = "19"
3
+ __version__ = "0.6.1"
4
+ __build__ = "21"
5
5
  __author__ = "Robert Matsuoka"
6
6
  __email__ = "bobmatnyc@gmail.com"
7
7
 
@@ -22,11 +22,14 @@ from ..output import (
22
22
  print_tip,
23
23
  )
24
24
 
25
- # Create index subcommand app
26
- index_app = typer.Typer(help="Index codebase for semantic search")
25
+ # Create index subcommand app with callback for direct usage
26
+ index_app = typer.Typer(
27
+ help="Index codebase for semantic search",
28
+ invoke_without_command=True,
29
+ )
27
30
 
28
31
 
29
- @index_app.command()
32
+ @index_app.callback(invoke_without_command=True)
30
33
  def main(
31
34
  ctx: typer.Context,
32
35
  watch: bool = typer.Option(
@@ -95,8 +98,12 @@ def main(
95
98
 
96
99
  [dim]💡 Tip: Use incremental indexing (default) for faster updates on subsequent runs.[/dim]
97
100
  """
101
+ # If a subcommand was invoked, don't run the indexing logic
102
+ if ctx.invoked_subcommand is not None:
103
+ return
104
+
98
105
  try:
99
- project_root = ctx.obj.get("project_root") or Path.cwd()
106
+ project_root = (ctx.obj.get("project_root") if ctx.obj else None) or Path.cwd()
100
107
 
101
108
  # Run async indexing
102
109
  asyncio.run(
@@ -5,6 +5,11 @@ from pathlib import Path
5
5
  import typer
6
6
  from loguru import logger
7
7
 
8
+ from ...config.constants import (
9
+ SUBPROCESS_INSTALL_TIMEOUT,
10
+ SUBPROCESS_MCP_TIMEOUT,
11
+ SUBPROCESS_TEST_TIMEOUT,
12
+ )
8
13
  from ...config.defaults import DEFAULT_EMBEDDING_MODELS, DEFAULT_FILE_EXTENSIONS
9
14
  from ...core.exceptions import ProjectInitializationError
10
15
  from ...core.project import ProjectManager
@@ -150,11 +155,15 @@ def main(
150
155
  install_cmd = ["pip", "install", "-e", str(dev_source_path)]
151
156
  try:
152
157
  result = subprocess.run(
153
- install_cmd, capture_output=True, text=True, timeout=120
158
+ install_cmd,
159
+ capture_output=True,
160
+ text=True,
161
+ timeout=SUBPROCESS_INSTALL_TIMEOUT,
154
162
  )
155
163
  if result.returncode == 0:
156
164
  install_success = True
157
- except:
165
+ except (subprocess.TimeoutExpired, OSError, ValueError) as e:
166
+ logger.debug(f"pip install method failed: {e}")
158
167
  pass
159
168
 
160
169
  # Method 2: Try python -m pip
@@ -169,11 +178,15 @@ def main(
169
178
  ]
170
179
  try:
171
180
  result = subprocess.run(
172
- install_cmd, capture_output=True, text=True, timeout=120
181
+ install_cmd,
182
+ capture_output=True,
183
+ text=True,
184
+ timeout=SUBPROCESS_INSTALL_TIMEOUT,
173
185
  )
174
186
  if result.returncode == 0:
175
187
  install_success = True
176
- except:
188
+ except (subprocess.TimeoutExpired, OSError, ValueError) as e:
189
+ logger.debug(f"python -m pip install method failed: {e}")
177
190
  pass
178
191
 
179
192
  # Method 3: Try uv if available
@@ -181,11 +194,15 @@ def main(
181
194
  install_cmd = ["uv", "add", "--editable", str(dev_source_path)]
182
195
  try:
183
196
  result = subprocess.run(
184
- install_cmd, capture_output=True, text=True, timeout=120
197
+ install_cmd,
198
+ capture_output=True,
199
+ text=True,
200
+ timeout=SUBPROCESS_INSTALL_TIMEOUT,
185
201
  )
186
202
  if result.returncode == 0:
187
203
  install_success = True
188
- except:
204
+ except (subprocess.TimeoutExpired, OSError, ValueError) as e:
205
+ logger.debug(f"uv add method failed: {e}")
189
206
  pass
190
207
 
191
208
  if install_success:
@@ -502,7 +519,10 @@ async def run_init_setup(
502
519
  ] + server_command.split()
503
520
 
504
521
  result = subprocess.run(
505
- cmd_args, capture_output=True, text=True, timeout=30
522
+ cmd_args,
523
+ capture_output=True,
524
+ text=True,
525
+ timeout=SUBPROCESS_MCP_TIMEOUT,
506
526
  )
507
527
 
508
528
  if result.returncode == 0:
@@ -663,7 +683,7 @@ def _test_mcp_server(project_root: Path) -> None:
663
683
 
664
684
  try:
665
685
  stdout, stderr = test_process.communicate(
666
- input=json.dumps(init_request) + "\n", timeout=10
686
+ input=json.dumps(init_request) + "\n", timeout=SUBPROCESS_TEST_TIMEOUT
667
687
  )
668
688
 
669
689
  if test_process.returncode == 0:
@@ -40,22 +40,22 @@ def detect_ai_tools() -> dict[str, Path]:
40
40
  Dictionary mapping tool names to their config file paths.
41
41
  For Claude Code, returns a placeholder path since it uses project-scoped .mcp.json
42
42
  """
43
- HOME = Path.home()
43
+ home = Path.home()
44
44
 
45
- CONFIG_LOCATIONS = {
46
- "claude-desktop": HOME
45
+ config_locations = {
46
+ "claude-desktop": home
47
47
  / "Library"
48
48
  / "Application Support"
49
49
  / "Claude"
50
50
  / "claude_desktop_config.json",
51
- "cursor": HOME / ".cursor" / "mcp.json",
52
- "windsurf": HOME / ".codeium" / "windsurf" / "mcp_config.json",
53
- "vscode": HOME / ".vscode" / "mcp.json",
51
+ "cursor": home / ".cursor" / "mcp.json",
52
+ "windsurf": home / ".codeium" / "windsurf" / "mcp_config.json",
53
+ "vscode": home / ".vscode" / "mcp.json",
54
54
  }
55
55
 
56
56
  # Return only tools with existing config files
57
57
  detected_tools = {}
58
- for tool_name, config_path in CONFIG_LOCATIONS.items():
58
+ for tool_name, config_path in config_locations.items():
59
59
  if config_path.exists():
60
60
  detected_tools[tool_name] = config_path
61
61
 
@@ -587,12 +587,12 @@ def main():
587
587
 
588
588
  class UserService:
589
589
  '''Service for managing users.'''
590
-
590
+
591
591
  def create_user(self, name: str, email: str):
592
592
  '''Create a new user with the given name and email.'''
593
593
  print(f"Creating user: {name} ({email})")
594
594
  return {"name": name, "email": email}
595
-
595
+
596
596
  def authenticate_user(self, email: str, password: str):
597
597
  '''Authenticate user with email and password.'''
598
598
  # Simple authentication logic
@@ -83,7 +83,7 @@ def reset_index(
83
83
  raise typer.Exit(0)
84
84
 
85
85
  # Get the database directory
86
- config = project_manager.load_config()
86
+ project_manager.load_config()
87
87
  db_path = root / ".mcp_vector_search" / "db"
88
88
 
89
89
  if not db_path.exists():
@@ -9,6 +9,7 @@ from loguru import logger
9
9
  from ...core.database import ChromaVectorDatabase
10
10
  from ...core.embeddings import create_embedding_function
11
11
  from ...core.exceptions import ProjectNotFoundError
12
+ from ...core.indexer import SemanticIndexer
12
13
  from ...core.project import ProjectManager
13
14
  from ...core.search import SemanticSearchEngine
14
15
  from ..didyoumean import create_enhanced_typer
@@ -258,6 +259,47 @@ async def run_search(
258
259
  embedding_function=embedding_function,
259
260
  )
260
261
 
262
+ # Create indexer for version check
263
+ indexer = SemanticIndexer(
264
+ database=database,
265
+ project_root=project_root,
266
+ file_extensions=config.file_extensions,
267
+ )
268
+
269
+ # Check if reindex is needed due to version upgrade
270
+ if config.auto_reindex_on_upgrade and indexer.needs_reindex_for_version():
271
+ from ..output import console
272
+
273
+ index_version = indexer.get_index_version()
274
+ from ... import __version__
275
+
276
+ if index_version:
277
+ console.print(
278
+ f"[yellow]⚠️ Index created with version {index_version} (current: {__version__})[/yellow]"
279
+ )
280
+ else:
281
+ console.print(
282
+ "[yellow]⚠️ Index version not found (legacy format detected)[/yellow]"
283
+ )
284
+
285
+ console.print(
286
+ "[yellow] Reindexing to take advantage of improvements...[/yellow]"
287
+ )
288
+
289
+ # Auto-reindex with progress
290
+ try:
291
+ indexed_count = await indexer.index_project(
292
+ force_reindex=True, show_progress=False
293
+ )
294
+ console.print(
295
+ f"[green]✓ Index updated to version {__version__} ({indexed_count} files reindexed)[/green]\n"
296
+ )
297
+ except Exception as e:
298
+ console.print(f"[red]✗ Reindexing failed: {e}[/red]")
299
+ console.print(
300
+ "[yellow] Continuing with existing index (may have outdated patterns)[/yellow]\n"
301
+ )
302
+
261
303
  search_engine = SemanticSearchEngine(
262
304
  database=database,
263
305
  project_root=project_root,
@@ -9,6 +9,7 @@ from typing import Any
9
9
  import typer
10
10
  from loguru import logger
11
11
 
12
+ from ... import __version__
12
13
  from ...core.database import ChromaVectorDatabase
13
14
  from ...core.embeddings import create_embedding_function
14
15
  from ...core.exceptions import ProjectNotFoundError
@@ -165,6 +166,10 @@ async def show_status(
165
166
  index_stats = await indexer.get_indexing_stats()
166
167
  db_stats = await database.get_stats()
167
168
 
169
+ # Get version information
170
+ index_version = indexer.get_index_version()
171
+ needs_reindex = indexer.needs_reindex_for_version()
172
+
168
173
  # Compile status data
169
174
  status_data = {
170
175
  "project": {
@@ -181,6 +186,7 @@ async def show_status(
181
186
  "max_chunk_size": config.max_chunk_size,
182
187
  "cache_embeddings": config.cache_embeddings,
183
188
  "watch_files": config.watch_files,
189
+ "auto_reindex_on_upgrade": config.auto_reindex_on_upgrade,
184
190
  },
185
191
  "index": {
186
192
  "total_files": index_stats.get("total_indexable_files", 0),
@@ -189,6 +195,9 @@ async def show_status(
189
195
  "languages": index_stats.get("languages", {}),
190
196
  "index_size_mb": db_stats.index_size_mb,
191
197
  "last_updated": db_stats.last_updated,
198
+ "index_version": index_version,
199
+ "current_version": __version__,
200
+ "needs_reindex": needs_reindex,
192
201
  },
193
202
  }
194
203
 
@@ -267,12 +276,36 @@ def _display_status(
267
276
  console.print(f" Total Chunks: {index_data['total_chunks']}")
268
277
  console.print(f" Index Size: {index_data['index_size_mb']:.2f} MB")
269
278
 
279
+ # Version information
280
+ index_version = index_data.get("index_version")
281
+ current_version = index_data.get("current_version", __version__)
282
+ needs_reindex = index_data.get("needs_reindex", False)
283
+
284
+ if index_version:
285
+ if needs_reindex:
286
+ console.print(
287
+ f" Version: [yellow]{index_version}[/yellow] (current: {current_version}) [yellow]⚠️ Reindex recommended[/yellow]"
288
+ )
289
+ else:
290
+ console.print(f" Version: [green]{index_version}[/green] (up to date)")
291
+ else:
292
+ console.print(
293
+ f" Version: [yellow]Not tracked[/yellow] (current: {current_version}) [yellow]⚠️ Reindex recommended[/yellow]"
294
+ )
295
+
270
296
  if index_data["languages"]:
271
297
  console.print(" Language Distribution:")
272
298
  for lang, count in index_data["languages"].items():
273
299
  console.print(f" {lang}: {count} chunks")
274
300
  console.print()
275
301
 
302
+ # Show reindex recommendation if needed
303
+ if needs_reindex:
304
+ console.print(
305
+ "[yellow]💡 Tip: Run 'mcp-vector-search index' to reindex with the latest improvements[/yellow]"
306
+ )
307
+ console.print()
308
+
276
309
  # Health check results
277
310
  if "health" in status_data:
278
311
  health_data = status_data["health"]
@@ -266,7 +266,6 @@ COMMON_TYPOS = {
266
266
  "context": "mcp",
267
267
  "m": "mcp", # Single letter shortcut
268
268
  # Install command variations
269
- "setup": "install",
270
269
  "deploy": "install",
271
270
  "add": "install",
272
271
  "instal": "install", # Common typo
@@ -471,7 +470,7 @@ def add_common_suggestions(ctx: click.Context, command_name: str) -> None:
471
470
  console.print("\n[yellow]Did you mean one of these?[/yellow]")
472
471
 
473
472
  # Show up to 3 best matches
474
- for cmd, ratio in fuzzy_matches[:3]:
473
+ for cmd, _ratio in fuzzy_matches[:3]:
475
474
  console.print(format_command_suggestion(cmd, show_examples=False))
476
475
 
477
476
  # Show example for the best match
@@ -184,8 +184,9 @@ class SearchHistory:
184
184
 
185
185
  self._save_favorites(favorites)
186
186
 
187
- except Exception:
187
+ except Exception as e:
188
188
  # Don't show error for this non-critical operation
189
+ logger.debug(f"Failed to update history ranking: {e}")
189
190
  pass
190
191
 
191
192
  def _load_history(self) -> list[dict[str, Any]]:
@@ -196,7 +197,8 @@ class SearchHistory:
196
197
  try:
197
198
  with open(self.history_file, encoding="utf-8") as f:
198
199
  return json.load(f)
199
- except Exception:
200
+ except Exception as e:
201
+ logger.debug(f"Failed to load history file: {e}")
200
202
  return []
201
203
 
202
204
  def _save_history(self, history: list[dict[str, Any]]) -> None:
@@ -212,7 +214,8 @@ class SearchHistory:
212
214
  try:
213
215
  with open(self.favorites_file, encoding="utf-8") as f:
214
216
  return json.load(f)
215
- except Exception:
217
+ except Exception as e:
218
+ logger.debug(f"Failed to load favorites file: {e}")
216
219
  return []
217
220
 
218
221
  def _save_favorites(self, favorites: list[dict[str, Any]]) -> None:
@@ -74,10 +74,10 @@ unfamiliar codebases, finding similar patterns, and integrating with AI tools.
74
74
  )
75
75
 
76
76
  # Import command functions for direct registration and aliases
77
- from .commands.index import main as index_main
78
- from .commands.install import demo as install_demo
79
- from .commands.install import main as install_main
80
- from .commands.status import main as status_main
77
+ from .commands.index import main as index_main # noqa: E402
78
+ from .commands.install import demo as install_demo # noqa: E402
79
+ from .commands.install import main as install_main # noqa: E402
80
+ from .commands.status import main as status_main # noqa: E402
81
81
 
82
82
  # Note: config doesn't have a main function, it uses subcommands via config_app
83
83
  app.command("install", help="🚀 Install mcp-vector-search in projects")(install_main)
@@ -377,8 +377,9 @@ def handle_command_error(ctx, param, value):
377
377
  try:
378
378
  project_root = ctx.obj.get("project_root") if ctx.obj else None
379
379
  get_contextual_suggestions(project_root, command_name)
380
- except Exception:
380
+ except Exception as e:
381
381
  # If contextual suggestions fail, don't break the error flow
382
+ logger.debug(f"Failed to get contextual suggestions: {e}")
382
383
  pass
383
384
  raise
384
385
 
@@ -393,7 +394,8 @@ def help_contextual() -> None:
393
394
  )
394
395
  console.print("[dim]CLI-first semantic code search with MCP integration[/dim]")
395
396
  get_contextual_suggestions(project_root)
396
- except Exception:
397
+ except Exception as e:
398
+ logger.debug(f"Failed to show contextual help: {e}")
397
399
  console.print(
398
400
  "\n[dim]Use [bold]mcp-vector-search --help[/bold] for more information.[/dim]"
399
401
  )
@@ -457,7 +459,10 @@ def cli_with_suggestions():
457
459
  try:
458
460
  project_root = Path.cwd()
459
461
  get_contextual_suggestions(project_root, command_name)
460
- except Exception:
462
+ except Exception as e:
463
+ logger.debug(
464
+ f"Failed to get contextual suggestions for error handling: {e}"
465
+ )
461
466
  pass
462
467
 
463
468
  sys.exit(2) # Exit with error code
@@ -73,8 +73,9 @@ class ContextualSuggestionProvider:
73
73
  # For now, we'll assume false
74
74
  state["has_recent_changes"] = False
75
75
 
76
- except Exception:
76
+ except Exception as e:
77
77
  # If we can't determine state, provide conservative defaults
78
+ logger.debug(f"Failed to determine project state for suggestions: {e}")
78
79
  pass
79
80
 
80
81
  return state
@@ -0,0 +1,24 @@
1
+ """Project-wide constants for MCP Vector Search.
2
+
3
+ This module contains all magic numbers and configuration constants
4
+ used throughout the application to improve maintainability and clarity.
5
+ """
6
+
7
+ # Timeout Constants (in seconds)
8
+ SUBPROCESS_INSTALL_TIMEOUT = 120 # Timeout for package installation commands
9
+ SUBPROCESS_SHORT_TIMEOUT = 10 # Short timeout for quick commands (version checks, etc.)
10
+ SUBPROCESS_MCP_TIMEOUT = 30 # Timeout for MCP server operations
11
+ SUBPROCESS_TEST_TIMEOUT = 5 # Timeout for server test operations
12
+ CONNECTION_POOL_TIMEOUT = 30.0 # Connection pool acquisition timeout
13
+
14
+ # Chunking Constants
15
+ DEFAULT_CHUNK_SIZE = 50 # Default number of lines per code chunk
16
+ TEXT_CHUNK_SIZE = 30 # Number of lines per text/markdown chunk
17
+ SEARCH_RESULT_LIMIT = 20 # Default number of search results to return
18
+
19
+ # Threshold Constants
20
+ DEFAULT_SIMILARITY_THRESHOLD = 0.5 # Default similarity threshold for search (0.0-1.0)
21
+ HIGH_SIMILARITY_THRESHOLD = 0.75 # Higher threshold for more precise matches
22
+
23
+ # Cache Constants
24
+ DEFAULT_CACHE_SIZE = 256 # Default LRU cache size for file reads
@@ -108,6 +108,8 @@ DEFAULT_IGNORE_PATTERNS = [
108
108
  ".hg",
109
109
  "__pycache__",
110
110
  ".pytest_cache",
111
+ ".mypy_cache", # mypy type checking cache
112
+ ".ruff_cache", # ruff linter cache
111
113
  "node_modules",
112
114
  ".venv",
113
115
  "venv",
@@ -120,6 +122,8 @@ DEFAULT_IGNORE_PATTERNS = [
120
122
  "*.egg-info",
121
123
  ".DS_Store",
122
124
  "Thumbs.db",
125
+ ".claude-mpm", # Claude MPM directory
126
+ ".mcp-vector-search", # Our own index directory
123
127
  ]
124
128
 
125
129
  # File patterns to ignore
@@ -37,6 +37,10 @@ class ProjectConfig(BaseSettings):
37
37
  max_cache_size: int = Field(
38
38
  default=1000, gt=0, description="Maximum number of cached embeddings"
39
39
  )
40
+ auto_reindex_on_upgrade: bool = Field(
41
+ default=True,
42
+ description="Automatically reindex when tool version is upgraded (minor/major versions)",
43
+ )
40
44
 
41
45
  @field_validator("project_root", "index_path", mode="before")
42
46
  @classmethod
@@ -483,7 +483,7 @@ class ChromaVectorDatabase(VectorDatabase):
483
483
  return
484
484
 
485
485
  # Check for HNSW index files that might be corrupted
486
- collection_path = self.persist_directory / "chroma-collections.parquet"
486
+ self.persist_directory / "chroma-collections.parquet"
487
487
  index_path = self.persist_directory / "index"
488
488
 
489
489
  if index_path.exists():
@@ -60,16 +60,24 @@ class ComponentFactory:
60
60
  def create_database(
61
61
  config: ProjectConfig,
62
62
  embedding_function: CodeBERTEmbeddingFunction,
63
- use_pooling: bool = False,
63
+ use_pooling: bool = True, # Enable pooling by default for 13.6% performance boost
64
64
  **pool_kwargs,
65
65
  ) -> VectorDatabase:
66
66
  """Create vector database."""
67
67
  if use_pooling:
68
+ # Set default pool parameters if not provided
69
+ pool_defaults = {
70
+ "max_connections": 10,
71
+ "min_connections": 2,
72
+ "max_idle_time": 300.0,
73
+ }
74
+ pool_defaults.update(pool_kwargs)
75
+
68
76
  return PooledChromaVectorDatabase(
69
77
  persist_directory=config.index_path,
70
78
  embedding_function=embedding_function,
71
79
  collection_name="code_search",
72
- **pool_kwargs,
80
+ **pool_defaults,
73
81
  )
74
82
  else:
75
83
  return ChromaVectorDatabase(
@@ -124,7 +132,7 @@ class ComponentFactory:
124
132
  @staticmethod
125
133
  async def create_standard_components(
126
134
  project_root: Path,
127
- use_pooling: bool = False,
135
+ use_pooling: bool = True, # Enable pooling by default for performance
128
136
  include_search_engine: bool = False,
129
137
  include_auto_indexer: bool = False,
130
138
  similarity_threshold: float = 0.7,
@@ -3,10 +3,13 @@
3
3
  import asyncio
4
4
  import json
5
5
  import os
6
+ from datetime import UTC, datetime
6
7
  from pathlib import Path
7
8
 
8
9
  from loguru import logger
10
+ from packaging import version
9
11
 
12
+ from .. import __version__
10
13
  from ..config.defaults import DEFAULT_IGNORE_PATTERNS
11
14
  from ..parsers.registry import get_parser_registry
12
15
  from ..utils.gitignore import create_gitignore_parser
@@ -40,9 +43,15 @@ class SemanticIndexer:
40
43
  self.file_extensions = {ext.lower() for ext in file_extensions}
41
44
  self.parser_registry = get_parser_registry()
42
45
  self._ignore_patterns = set(DEFAULT_IGNORE_PATTERNS)
43
- self.max_workers = max_workers or min(
44
- 4, (asyncio.get_event_loop().get_debug() and 1) or 4
45
- )
46
+
47
+ # Safely get event loop for max_workers
48
+ try:
49
+ loop = asyncio.get_event_loop()
50
+ self.max_workers = max_workers or min(4, (loop.get_debug() and 1) or 4)
51
+ except RuntimeError:
52
+ # No event loop in current thread
53
+ self.max_workers = max_workers or 4
54
+
46
55
  self.batch_size = batch_size
47
56
  self._index_metadata_file = (
48
57
  project_root / ".mcp-vector-search" / "index_metadata.json"
@@ -182,7 +191,13 @@ class SemanticIndexer:
182
191
 
183
192
  try:
184
193
  with open(self._index_metadata_file) as f:
185
- return json.load(f)
194
+ data = json.load(f)
195
+ # Handle legacy format (just file_mtimes dict) and new format
196
+ if "file_mtimes" in data:
197
+ return data["file_mtimes"]
198
+ else:
199
+ # Legacy format - just return as-is
200
+ return data
186
201
  except Exception as e:
187
202
  logger.warning(f"Failed to load index metadata: {e}")
188
203
  return {}
@@ -197,8 +212,15 @@ class SemanticIndexer:
197
212
  # Ensure directory exists
198
213
  self._index_metadata_file.parent.mkdir(parents=True, exist_ok=True)
199
214
 
215
+ # New metadata format with version tracking
216
+ data = {
217
+ "index_version": __version__,
218
+ "indexed_at": datetime.now(UTC).isoformat(),
219
+ "file_mtimes": metadata,
220
+ }
221
+
200
222
  with open(self._index_metadata_file, "w") as f:
201
- json.dump(metadata, f, indent=2)
223
+ json.dump(data, f, indent=2)
202
224
  except Exception as e:
203
225
  logger.warning(f"Failed to save index metadata: {e}")
204
226
 
@@ -447,6 +469,59 @@ class SemanticIndexer:
447
469
  """
448
470
  return self._ignore_patterns.copy()
449
471
 
472
+ def get_index_version(self) -> str | None:
473
+ """Get the version of the tool that created the current index.
474
+
475
+ Returns:
476
+ Version string or None if not available
477
+ """
478
+ if not self._index_metadata_file.exists():
479
+ return None
480
+
481
+ try:
482
+ with open(self._index_metadata_file) as f:
483
+ data = json.load(f)
484
+ return data.get("index_version")
485
+ except Exception as e:
486
+ logger.warning(f"Failed to read index version: {e}")
487
+ return None
488
+
489
+ def needs_reindex_for_version(self) -> bool:
490
+ """Check if reindex is needed due to version upgrade.
491
+
492
+ Returns:
493
+ True if reindex is needed for version compatibility
494
+ """
495
+ index_version = self.get_index_version()
496
+
497
+ if not index_version:
498
+ # No version recorded - this is either a new index or legacy format
499
+ # Reindex to establish version tracking
500
+ return True
501
+
502
+ try:
503
+ current = version.parse(__version__)
504
+ indexed = version.parse(index_version)
505
+
506
+ # Reindex on major or minor version change
507
+ # Patch versions (0.5.1 -> 0.5.2) don't require reindex
508
+ needs_reindex = (
509
+ current.major != indexed.major or current.minor != indexed.minor
510
+ )
511
+
512
+ if needs_reindex:
513
+ logger.info(
514
+ f"Version upgrade detected: {index_version} -> {__version__} "
515
+ f"(reindex recommended)"
516
+ )
517
+
518
+ return needs_reindex
519
+
520
+ except Exception as e:
521
+ logger.warning(f"Failed to compare versions: {e}")
522
+ # If we can't parse versions, be safe and reindex
523
+ return True
524
+
450
525
  async def get_indexing_stats(self) -> dict:
451
526
  """Get statistics about the indexing process.
452
527