mcp-vector-search 0.12.6__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. mcp_vector_search/__init__.py +2 -2
  2. mcp_vector_search/analysis/__init__.py +64 -0
  3. mcp_vector_search/analysis/collectors/__init__.py +39 -0
  4. mcp_vector_search/analysis/collectors/base.py +164 -0
  5. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  6. mcp_vector_search/analysis/metrics.py +341 -0
  7. mcp_vector_search/analysis/reporters/__init__.py +5 -0
  8. mcp_vector_search/analysis/reporters/console.py +222 -0
  9. mcp_vector_search/cli/commands/analyze.py +408 -0
  10. mcp_vector_search/cli/commands/chat.py +1262 -0
  11. mcp_vector_search/cli/commands/index.py +21 -3
  12. mcp_vector_search/cli/commands/init.py +13 -0
  13. mcp_vector_search/cli/commands/install.py +597 -335
  14. mcp_vector_search/cli/commands/install_old.py +8 -4
  15. mcp_vector_search/cli/commands/mcp.py +78 -6
  16. mcp_vector_search/cli/commands/reset.py +68 -26
  17. mcp_vector_search/cli/commands/search.py +30 -7
  18. mcp_vector_search/cli/commands/setup.py +1133 -0
  19. mcp_vector_search/cli/commands/status.py +37 -2
  20. mcp_vector_search/cli/commands/uninstall.py +276 -357
  21. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  22. mcp_vector_search/cli/commands/visualize/cli.py +276 -0
  23. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  24. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  25. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  26. mcp_vector_search/cli/commands/visualize/graph_builder.py +714 -0
  27. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  28. mcp_vector_search/cli/commands/visualize/server.py +311 -0
  29. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  30. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  31. mcp_vector_search/cli/commands/visualize/templates/base.py +180 -0
  32. mcp_vector_search/cli/commands/visualize/templates/scripts.py +2507 -0
  33. mcp_vector_search/cli/commands/visualize/templates/styles.py +1313 -0
  34. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  35. mcp_vector_search/cli/didyoumean.py +22 -2
  36. mcp_vector_search/cli/main.py +115 -159
  37. mcp_vector_search/cli/output.py +24 -8
  38. mcp_vector_search/config/__init__.py +4 -0
  39. mcp_vector_search/config/default_thresholds.yaml +52 -0
  40. mcp_vector_search/config/settings.py +12 -0
  41. mcp_vector_search/config/thresholds.py +185 -0
  42. mcp_vector_search/core/auto_indexer.py +3 -3
  43. mcp_vector_search/core/boilerplate.py +186 -0
  44. mcp_vector_search/core/config_utils.py +394 -0
  45. mcp_vector_search/core/database.py +369 -94
  46. mcp_vector_search/core/exceptions.py +11 -0
  47. mcp_vector_search/core/git_hooks.py +4 -4
  48. mcp_vector_search/core/indexer.py +221 -4
  49. mcp_vector_search/core/llm_client.py +751 -0
  50. mcp_vector_search/core/models.py +3 -0
  51. mcp_vector_search/core/project.py +17 -0
  52. mcp_vector_search/core/scheduler.py +11 -11
  53. mcp_vector_search/core/search.py +179 -29
  54. mcp_vector_search/mcp/server.py +24 -5
  55. mcp_vector_search/utils/__init__.py +2 -0
  56. mcp_vector_search/utils/gitignore_updater.py +212 -0
  57. mcp_vector_search/utils/monorepo.py +66 -4
  58. mcp_vector_search/utils/timing.py +10 -6
  59. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/METADATA +182 -52
  60. mcp_vector_search-1.0.3.dist-info/RECORD +97 -0
  61. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/WHEEL +1 -1
  62. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/entry_points.txt +1 -0
  63. mcp_vector_search/cli/commands/visualize.py +0 -1467
  64. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  65. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/licenses/LICENSE +0 -0
@@ -147,6 +147,9 @@ class SearchResult(BaseModel):
147
147
  context_before: list[str] = Field(default=[], description="Lines before the match")
148
148
  context_after: list[str] = Field(default=[], description="Lines after the match")
149
149
  highlights: list[str] = Field(default=[], description="Highlighted terms")
150
+ file_missing: bool = Field(
151
+ default=False, description="True if file no longer exists (stale index)"
152
+ )
150
153
 
151
154
  class Config:
152
155
  arbitrary_types_allowed = True
@@ -107,6 +107,23 @@ class ProjectManager:
107
107
  index_path = get_default_index_path(self.project_root)
108
108
  index_path.mkdir(parents=True, exist_ok=True)
109
109
 
110
+ # Ensure .mcp-vector-search/ is in .gitignore
111
+ # This is a non-critical operation - failures are logged but don't block initialization
112
+ try:
113
+ from ..utils.gitignore_updater import ensure_gitignore_entry
114
+
115
+ ensure_gitignore_entry(
116
+ self.project_root,
117
+ pattern=".mcp-vector-search/",
118
+ comment="MCP Vector Search index directory",
119
+ )
120
+ except Exception as e:
121
+ # Log warning but continue initialization
122
+ logger.warning(f"Could not update .gitignore: {e}")
123
+ logger.info(
124
+ "Please manually add '.mcp-vector-search/' to your .gitignore file"
125
+ )
126
+
110
127
  # Detect languages and files
111
128
  detected_languages = self.detect_languages()
112
129
  file_count = self.count_indexable_files(
@@ -73,7 +73,7 @@ class SchedulerManager:
73
73
  project_root = str(self.project_root)
74
74
 
75
75
  # Create wrapper script
76
- script_content = f'''#!/bin/bash
76
+ script_content = f"""#!/bin/bash
77
77
  # MCP Vector Search Auto-Reindex - {task_name}
78
78
  cd "{project_root}" || exit 1
79
79
 
@@ -85,7 +85,7 @@ elif [ -f "{python_path}" ]; then
85
85
  else
86
86
  python3 -m mcp_vector_search auto-index check --auto-reindex --max-files 10
87
87
  fi
88
- '''
88
+ """
89
89
 
90
90
  # Write script to temp file
91
91
  script_dir = Path.home() / ".mcp-vector-search" / "scripts"
@@ -109,7 +109,7 @@ fi
109
109
 
110
110
  # Get current crontab
111
111
  try:
112
- result = subprocess.run(
112
+ result = subprocess.run( # nosec B607
113
113
  ["crontab", "-l"], capture_output=True, text=True, check=True
114
114
  )
115
115
  current_crontab = result.stdout
@@ -125,7 +125,7 @@ fi
125
125
  new_crontab = current_crontab + cron_entry
126
126
 
127
127
  # Install new crontab
128
- process = subprocess.Popen(
128
+ process = subprocess.Popen( # nosec B607
129
129
  ["crontab", "-"], stdin=subprocess.PIPE, text=True
130
130
  )
131
131
  process.communicate(input=new_crontab)
@@ -148,7 +148,7 @@ fi
148
148
  try:
149
149
  # Get current crontab
150
150
  try:
151
- result = subprocess.run(
151
+ result = subprocess.run( # nosec B607
152
152
  ["crontab", "-l"], capture_output=True, text=True, check=True
153
153
  )
154
154
  current_crontab = result.stdout
@@ -163,13 +163,13 @@ fi
163
163
 
164
164
  # Install new crontab
165
165
  if new_crontab.strip():
166
- process = subprocess.Popen(
166
+ process = subprocess.Popen( # nosec B607
167
167
  ["crontab", "-"], stdin=subprocess.PIPE, text=True
168
168
  )
169
169
  process.communicate(input=new_crontab)
170
170
  else:
171
171
  # Remove crontab entirely if empty
172
- subprocess.run(["crontab", "-r"], check=False)
172
+ subprocess.run(["crontab", "-r"], check=False) # nosec B607
173
173
 
174
174
  # Remove script file
175
175
  script_dir = Path.home() / ".mcp-vector-search" / "scripts"
@@ -191,7 +191,7 @@ fi
191
191
  project_root = str(self.project_root)
192
192
 
193
193
  # Create PowerShell script
194
- script_content = f'''# MCP Vector Search Auto-Reindex - {task_name}
194
+ script_content = f"""# MCP Vector Search Auto-Reindex - {task_name}
195
195
  Set-Location "{project_root}"
196
196
 
197
197
  try {{
@@ -205,7 +205,7 @@ try {{
205
205
  }} catch {{
206
206
  # Silently ignore errors
207
207
  }}
208
- '''
208
+ """
209
209
 
210
210
  # Write script
211
211
  script_dir = Path.home() / ".mcp-vector-search" / "scripts"
@@ -302,7 +302,7 @@ try {{
302
302
  def _get_cron_status(self, task_name: str) -> dict:
303
303
  """Get cron job status."""
304
304
  try:
305
- result = subprocess.run(
305
+ result = subprocess.run( # nosec B607
306
306
  ["crontab", "-l"], capture_output=True, text=True, check=True
307
307
  )
308
308
 
@@ -315,7 +315,7 @@ try {{
315
315
  def _get_windows_task_status(self, task_name: str) -> dict:
316
316
  """Get Windows task status."""
317
317
  try:
318
- result = subprocess.run(
318
+ result = subprocess.run( # nosec B607
319
319
  ["schtasks", "/query", "/tn", task_name], capture_output=True, text=True
320
320
  )
321
321
 
@@ -1,5 +1,6 @@
1
1
  """Semantic search engine for MCP Vector Search."""
2
2
 
3
+ import asyncio
3
4
  import re
4
5
  import time
5
6
  from collections import OrderedDict
@@ -11,8 +12,9 @@ from loguru import logger
11
12
 
12
13
  from ..config.constants import DEFAULT_CACHE_SIZE
13
14
  from .auto_indexer import AutoIndexer, SearchTriggeredIndexer
15
+ from .boilerplate import BoilerplateFilter
14
16
  from .database import VectorDatabase
15
- from .exceptions import SearchError
17
+ from .exceptions import RustPanicError, SearchError
16
18
  from .models import SearchResult
17
19
 
18
20
 
@@ -67,6 +69,7 @@ class SemanticSearchEngine:
67
69
  _BOOST_SHALLOW_PATH = 0.02
68
70
  _PENALTY_TEST_FILE = -0.02
69
71
  _PENALTY_DEEP_PATH = -0.01
72
+ _PENALTY_BOILERPLATE = -0.15
70
73
 
71
74
  def __init__(
72
75
  self,
@@ -106,6 +109,156 @@ class SemanticSearchEngine:
106
109
  self._last_health_check: float = 0.0
107
110
  self._health_check_interval: float = 60.0
108
111
 
112
+ # Boilerplate filter for smart result ranking
113
+ self._boilerplate_filter = BoilerplateFilter()
114
+
115
+ @staticmethod
116
+ def _is_rust_panic_error(error: Exception) -> bool:
117
+ """Detect ChromaDB Rust panic errors.
118
+
119
+ Args:
120
+ error: Exception to check
121
+
122
+ Returns:
123
+ True if this is a Rust panic error
124
+ """
125
+ error_msg = str(error).lower()
126
+
127
+ # Check for the specific Rust panic pattern
128
+ # "range start index X out of range for slice of length Y"
129
+ if "range start index" in error_msg and "out of range" in error_msg:
130
+ return True
131
+
132
+ # Check for other Rust panic indicators
133
+ rust_panic_patterns = [
134
+ "rust panic",
135
+ "pyo3_runtime.panicexception",
136
+ "thread 'tokio-runtime-worker' panicked",
137
+ "rust/sqlite/src/db.rs", # Specific to the known ChromaDB issue
138
+ ]
139
+
140
+ return any(pattern in error_msg for pattern in rust_panic_patterns)
141
+
142
+ @staticmethod
143
+ def _is_corruption_error(error: Exception) -> bool:
144
+ """Detect index corruption errors.
145
+
146
+ Args:
147
+ error: Exception to check
148
+
149
+ Returns:
150
+ True if this is a corruption error
151
+ """
152
+ error_msg = str(error).lower()
153
+
154
+ corruption_indicators = [
155
+ "pickle",
156
+ "unpickling",
157
+ "eof",
158
+ "ran out of input",
159
+ "hnsw",
160
+ "deserialize",
161
+ "corrupt",
162
+ ]
163
+
164
+ return any(indicator in error_msg for indicator in corruption_indicators)
165
+
166
+ async def _search_with_retry(
167
+ self,
168
+ query: str,
169
+ limit: int,
170
+ filters: dict[str, Any] | None,
171
+ threshold: float,
172
+ max_retries: int = 3,
173
+ ) -> list[SearchResult]:
174
+ """Execute search with retry logic and exponential backoff.
175
+
176
+ Args:
177
+ query: Processed search query
178
+ limit: Maximum number of results
179
+ filters: Optional filters
180
+ threshold: Similarity threshold
181
+ max_retries: Maximum retry attempts (default: 3)
182
+
183
+ Returns:
184
+ List of search results
185
+
186
+ Raises:
187
+ RustPanicError: If Rust panic persists after retries
188
+ SearchError: If search fails for other reasons
189
+ """
190
+ last_error = None
191
+ backoff_delays = [0, 0.1, 0.5] # Immediate, 100ms, 500ms
192
+
193
+ for attempt in range(max_retries):
194
+ try:
195
+ # Add delay for retries (exponential backoff)
196
+ if attempt > 0 and backoff_delays[attempt] > 0:
197
+ await asyncio.sleep(backoff_delays[attempt])
198
+ logger.debug(
199
+ f"Retrying search after {backoff_delays[attempt]}s delay (attempt {attempt + 1}/{max_retries})"
200
+ )
201
+
202
+ # Perform the actual search
203
+ results = await self.database.search(
204
+ query=query,
205
+ limit=limit,
206
+ filters=filters,
207
+ similarity_threshold=threshold,
208
+ )
209
+
210
+ # Success! If we had retries, log that we recovered
211
+ if attempt > 0:
212
+ logger.info(
213
+ f"Search succeeded after {attempt + 1} attempts (recovered from transient error)"
214
+ )
215
+
216
+ return results
217
+
218
+ except BaseException as e:
219
+ # Re-raise system exceptions we should never catch
220
+ if isinstance(e, (KeyboardInterrupt, SystemExit, GeneratorExit)):
221
+ raise
222
+
223
+ last_error = e
224
+
225
+ # Check if this is a Rust panic
226
+ if self._is_rust_panic_error(e):
227
+ logger.warning(
228
+ f"ChromaDB Rust panic detected (attempt {attempt + 1}/{max_retries}): {e}"
229
+ )
230
+
231
+ # If this is the last retry, escalate to corruption recovery
232
+ if attempt == max_retries - 1:
233
+ logger.error(
234
+ "Rust panic persisted after all retries - index may be corrupted"
235
+ )
236
+ raise RustPanicError(
237
+ "ChromaDB Rust panic detected. The HNSW index may be corrupted. "
238
+ "Please run 'mcp-vector-search reset' followed by 'mcp-vector-search index' to rebuild."
239
+ ) from e
240
+
241
+ # Otherwise, continue to next retry
242
+ continue
243
+
244
+ # Check for general corruption
245
+ elif self._is_corruption_error(e):
246
+ logger.error(f"Index corruption detected: {e}")
247
+ raise SearchError(
248
+ "Index corruption detected. Please run 'mcp-vector-search reset' "
249
+ "followed by 'mcp-vector-search index' to rebuild."
250
+ ) from e
251
+
252
+ # Some other error - don't retry, just fail
253
+ else:
254
+ logger.error(f"Search failed: {e}")
255
+ raise SearchError(f"Search failed: {e}") from e
256
+
257
+ # Should never reach here, but just in case
258
+ raise SearchError(
259
+ f"Search failed after {max_retries} retries: {last_error}"
260
+ ) from last_error
261
+
109
262
  async def search(
110
263
  self,
111
264
  query: str,
@@ -162,12 +315,12 @@ class SemanticSearchEngine:
162
315
  # Preprocess query
163
316
  processed_query = self._preprocess_query(query)
164
317
 
165
- # Perform vector search
166
- results = await self.database.search(
318
+ # Perform vector search with retry logic
319
+ results = await self._search_with_retry(
167
320
  query=processed_query,
168
321
  limit=limit,
169
322
  filters=filters,
170
- similarity_threshold=threshold,
323
+ threshold=threshold,
171
324
  )
172
325
 
173
326
  # Post-process results
@@ -184,32 +337,13 @@ class SemanticSearchEngine:
184
337
  )
185
338
  return ranked_results
186
339
 
340
+ except (RustPanicError, SearchError):
341
+ # These errors are already properly formatted with user guidance
342
+ raise
187
343
  except Exception as e:
188
- error_msg = str(e).lower()
189
- # Check for corruption indicators
190
- if any(
191
- indicator in error_msg
192
- for indicator in [
193
- "pickle",
194
- "unpickling",
195
- "eof",
196
- "ran out of input",
197
- "hnsw",
198
- "index",
199
- "deserialize",
200
- "corrupt",
201
- ]
202
- ):
203
- logger.error(f"Index corruption detected during search: {e}")
204
- logger.info(
205
- "The index appears to be corrupted. Please run 'mcp-vector-search reset' to clear the index and then 'mcp-vector-search index' to rebuild it."
206
- )
207
- raise SearchError(
208
- "Index corruption detected. Please run 'mcp-vector-search reset' followed by 'mcp-vector-search index' to rebuild."
209
- ) from e
210
- else:
211
- logger.error(f"Search failed for query '{query}': {e}")
212
- raise SearchError(f"Search failed: {e}") from e
344
+ # Unexpected error - wrap it in SearchError
345
+ logger.error(f"Unexpected search error for query '{query}': {e}")
346
+ raise SearchError(f"Search failed: {e}") from e
213
347
 
214
348
  async def search_similar(
215
349
  self,
@@ -470,6 +604,11 @@ class SemanticSearchEngine:
470
604
  result.context_before = context_before
471
605
  result.context_after = context_after
472
606
 
607
+ except FileNotFoundError:
608
+ # File was deleted since indexing - silently skip context
609
+ # This is normal when index is stale; use --force to reindex
610
+ logger.debug(f"File no longer exists (stale index): {result.file_path}")
611
+ result.file_missing = True # Mark for potential filtering
473
612
  except Exception as e:
474
613
  logger.warning(f"Failed to get context for {result.file_path}: {e}")
475
614
 
@@ -562,6 +701,17 @@ class SemanticSearchEngine:
562
701
  elif path_depth > 5:
563
702
  score += self._PENALTY_DEEP_PATH
564
703
 
704
+ # Factor 7: Boilerplate penalty (penalize common boilerplate patterns)
705
+ # Apply penalty to function names (constructors, lifecycle methods, etc.)
706
+ if result.function_name:
707
+ boilerplate_penalty = self._boilerplate_filter.get_penalty(
708
+ name=result.function_name,
709
+ language=result.language,
710
+ query=query,
711
+ penalty=self._PENALTY_BOILERPLATE,
712
+ )
713
+ score += boilerplate_penalty
714
+
565
715
  # Ensure score doesn't exceed 1.0
566
716
  result.similarity_score = min(1.0, score)
567
717
 
@@ -38,11 +38,28 @@ class MCPVectorSearchServer:
38
38
  """Initialize the MCP server.
39
39
 
40
40
  Args:
41
- project_root: Project root directory. If None, will auto-detect.
41
+ project_root: Project root directory. If None, will auto-detect from:
42
+ 1. PROJECT_ROOT or MCP_PROJECT_ROOT environment variable
43
+ 2. Current working directory
42
44
  enable_file_watching: Enable file watching for automatic reindexing.
43
45
  If None, checks MCP_ENABLE_FILE_WATCHING env var (default: True).
44
46
  """
45
- self.project_root = project_root or Path.cwd()
47
+ # Auto-detect project root from environment or current directory
48
+ if project_root is None:
49
+ # Priority 1: MCP_PROJECT_ROOT (new standard)
50
+ # Priority 2: PROJECT_ROOT (legacy)
51
+ # Priority 3: Current working directory
52
+ env_project_root = os.getenv("MCP_PROJECT_ROOT") or os.getenv(
53
+ "PROJECT_ROOT"
54
+ )
55
+ if env_project_root:
56
+ project_root = Path(env_project_root).resolve()
57
+ logger.info(f"Using project root from environment: {project_root}")
58
+ else:
59
+ project_root = Path.cwd()
60
+ logger.info(f"Using current directory as project root: {project_root}")
61
+
62
+ self.project_root = project_root
46
63
  self.project_manager = ProjectManager(self.project_root)
47
64
  self.search_engine: SemanticSearchEngine | None = None
48
65
  self.file_watcher: FileWatcher | None = None
@@ -397,9 +414,11 @@ class MCPVectorSearchServer:
397
414
  "languages": config.languages,
398
415
  "total_chunks": stats.total_chunks,
399
416
  "total_files": stats.total_files,
400
- "index_size": f"{stats.index_size_mb:.2f} MB"
401
- if hasattr(stats, "index_size_mb")
402
- else "Unknown",
417
+ "index_size": (
418
+ f"{stats.index_size_mb:.2f} MB"
419
+ if hasattr(stats, "index_size_mb")
420
+ else "Unknown"
421
+ ),
403
422
  }
404
423
  else:
405
424
  status_info = {
@@ -6,6 +6,7 @@ from .gitignore import (
6
6
  create_gitignore_parser,
7
7
  is_path_gitignored,
8
8
  )
9
+ from .gitignore_updater import ensure_gitignore_entry
9
10
  from .timing import (
10
11
  PerformanceProfiler,
11
12
  SearchProfiler,
@@ -24,6 +25,7 @@ __all__ = [
24
25
  "GitignorePattern",
25
26
  "create_gitignore_parser",
26
27
  "is_path_gitignored",
28
+ "ensure_gitignore_entry",
27
29
  # Timing utilities
28
30
  "PerformanceProfiler",
29
31
  "TimingResult",
@@ -0,0 +1,212 @@
1
+ """Gitignore file update utilities for automatic .gitignore entry management."""
2
+
3
+ from pathlib import Path
4
+
5
+ from loguru import logger
6
+
7
+
8
+ def ensure_gitignore_entry(
9
+ project_root: Path,
10
+ pattern: str = ".mcp-vector-search/",
11
+ comment: str | None = "MCP Vector Search index directory",
12
+ create_if_missing: bool = True,
13
+ ) -> bool:
14
+ """Ensure a pattern exists in .gitignore file.
15
+
16
+ This function safely adds a pattern to .gitignore if it doesn't already exist.
17
+ It handles various edge cases including:
18
+ - Non-existent .gitignore files (creates if in git repo)
19
+ - Empty .gitignore files
20
+ - Existing patterns in various formats
21
+ - Negation patterns (conflict detection)
22
+ - Permission errors
23
+ - Encoding issues
24
+
25
+ Design Decision: Non-Blocking Operation
26
+ ----------------------------------------
27
+ This function is designed to be non-critical and non-blocking. It will:
28
+ - NEVER raise exceptions (returns False on errors)
29
+ - Log warnings for failures instead of blocking
30
+ - Allow project initialization to continue even if gitignore update fails
31
+
32
+ Rationale: .gitignore updates are a quality-of-life improvement, not a
33
+ requirement for mcp-vector-search functionality. Users can manually add
34
+ the entry if automatic update fails.
35
+
36
+ Pattern Detection Strategy
37
+ --------------------------
38
+ The function checks for semantic equivalents of the pattern:
39
+ - `.mcp-vector-search/` (exact match)
40
+ - `.mcp-vector-search` (without trailing slash)
41
+ - `.mcp-vector-search/*` (with wildcard)
42
+ - `/.mcp-vector-search/` (root-relative)
43
+
44
+ All are treated as equivalent to avoid duplicate entries.
45
+
46
+ Edge Cases Handled
47
+ ------------------
48
+ 1. .gitignore does not exist -> Create (if in git repo)
49
+ 2. .gitignore is empty -> Add pattern
50
+ 3. Pattern already exists -> Skip (log debug)
51
+ 4. Similar pattern exists -> Skip (log debug)
52
+ 5. Negation pattern exists -> Warn and skip (respects user intent)
53
+ 6. Not a git repository -> Skip (no .gitignore needed)
54
+ 7. Permission denied -> Warn and skip (log manual instructions)
55
+ 8. Encoding errors -> Try fallback encoding
56
+ 9. Missing parent directory -> Should not occur (project_root exists)
57
+ 10. Concurrent modification -> Safe (append operation is atomic-ish)
58
+
59
+ Args:
60
+ project_root: Project root directory (must exist)
61
+ pattern: Pattern to add to .gitignore (default: .mcp-vector-search/)
62
+ comment: Optional comment to add before the pattern
63
+ create_if_missing: Create .gitignore if it doesn't exist (default: True)
64
+
65
+ Returns:
66
+ True if pattern was added or already exists, False on error
67
+
68
+ Performance:
69
+ - Time Complexity: O(n) where n = lines in .gitignore (typically <1000)
70
+ - Space Complexity: O(n) for reading file into memory
71
+ - Expected Runtime: <10ms for typical .gitignore files
72
+
73
+ Notes:
74
+ - Only creates .gitignore in git repositories (checks for .git directory)
75
+ - Preserves existing file structure and encoding (UTF-8)
76
+ - Handles negation patterns gracefully (warns but doesn't override)
77
+ - Non-blocking: logs warnings instead of raising exceptions
78
+
79
+ Examples:
80
+ >>> # Basic usage during project initialization
81
+ >>> ensure_gitignore_entry(Path("/path/to/project"))
82
+ True
83
+
84
+ >>> # Custom pattern with custom comment
85
+ >>> ensure_gitignore_entry(
86
+ ... Path("/path/to/project"),
87
+ ... pattern=".custom-dir/",
88
+ ... comment="Custom tool directory"
89
+ ... )
90
+ True
91
+
92
+ >>> # Don't create .gitignore if missing
93
+ >>> ensure_gitignore_entry(
94
+ ... Path("/path/to/project"),
95
+ ... create_if_missing=False
96
+ ... )
97
+ False
98
+ """
99
+ gitignore_path = project_root / ".gitignore"
100
+
101
+ # Edge Case 1: Check if this is a git repository
102
+ # Only create/modify .gitignore in git repositories to avoid polluting non-git projects
103
+ git_dir = project_root / ".git"
104
+ if not git_dir.exists():
105
+ logger.debug(
106
+ "Not a git repository (no .git directory), skipping .gitignore update"
107
+ )
108
+ return False
109
+
110
+ try:
111
+ # Edge Case 2: Handle non-existent .gitignore
112
+ if not gitignore_path.exists():
113
+ if not create_if_missing:
114
+ logger.debug(".gitignore does not exist and create_if_missing=False")
115
+ return False
116
+
117
+ # Create new .gitignore with the pattern
118
+ content = f"# {comment}\n{pattern}\n" if comment else f"{pattern}\n"
119
+ gitignore_path.write_text(content, encoding="utf-8")
120
+ logger.info(f"Created .gitignore with {pattern} entry")
121
+ return True
122
+
123
+ # Read existing content with UTF-8 encoding
124
+ try:
125
+ content = gitignore_path.read_text(encoding="utf-8")
126
+ except UnicodeDecodeError:
127
+ # Edge Case 8: Fallback to more lenient encoding
128
+ logger.debug("UTF-8 decode failed, trying with error replacement")
129
+ try:
130
+ content = gitignore_path.read_text(encoding="utf-8", errors="replace")
131
+ except Exception as e:
132
+ logger.warning(
133
+ f"Failed to read .gitignore due to encoding error: {e}. "
134
+ f"Please manually add '{pattern}' to your .gitignore"
135
+ )
136
+ return False
137
+
138
+ # Edge Case 3: Handle empty .gitignore
139
+ stripped_content = content.strip()
140
+ if not stripped_content:
141
+ content = f"# {comment}\n{pattern}\n" if comment else f"{pattern}\n"
142
+ gitignore_path.write_text(content, encoding="utf-8")
143
+ logger.info(f"Added {pattern} to empty .gitignore")
144
+ return True
145
+
146
+ # Check for existing patterns (Edge Cases 4, 5, 6)
147
+ lines = content.split("\n")
148
+ normalized_pattern = pattern.rstrip("/").lstrip("/")
149
+
150
+ for line in lines:
151
+ # Skip comments and empty lines
152
+ stripped_line = line.strip()
153
+ if not stripped_line or stripped_line.startswith("#"):
154
+ continue
155
+
156
+ # Edge Case 6: Check for negation pattern (conflict)
157
+ # Negation patterns indicate explicit user intent to track the directory
158
+ if stripped_line.startswith("!") and normalized_pattern in stripped_line:
159
+ logger.warning(
160
+ f".gitignore contains negation pattern: {stripped_line}. "
161
+ "This indicates you want to track .mcp-vector-search/ in git. "
162
+ "Skipping automatic entry to respect your configuration."
163
+ )
164
+ return False
165
+
166
+ # Normalize line for comparison
167
+ normalized_line = stripped_line.rstrip("/").lstrip("/")
168
+
169
+ # Edge Cases 4 & 5: Check for exact or similar matches
170
+ # These patterns are semantically equivalent for .gitignore:
171
+ # - .mcp-vector-search/
172
+ # - .mcp-vector-search
173
+ # - .mcp-vector-search/*
174
+ # - /.mcp-vector-search/
175
+ if (
176
+ normalized_line == normalized_pattern
177
+ or normalized_line == normalized_pattern + "/*"
178
+ ):
179
+ logger.debug(f"Pattern already exists in .gitignore: {stripped_line}")
180
+ return True
181
+
182
+ # Pattern doesn't exist, add it
183
+ # Preserve file structure: ensure proper newline handling
184
+ if not content.endswith("\n"):
185
+ content += "\n"
186
+
187
+ # Add blank line before comment for visual separation
188
+ content += "\n"
189
+
190
+ if comment:
191
+ content += f"# {comment}\n"
192
+ content += f"{pattern}\n"
193
+
194
+ # Write back to file
195
+ gitignore_path.write_text(content, encoding="utf-8")
196
+ logger.info(f"Added {pattern} to .gitignore")
197
+ return True
198
+
199
+ except PermissionError:
200
+ # Edge Case 7: Handle read-only .gitignore or protected directory
201
+ logger.warning(
202
+ f"Cannot update .gitignore: Permission denied. "
203
+ f"Please manually add '{pattern}' to your .gitignore file at {gitignore_path}"
204
+ )
205
+ return False
206
+ except Exception as e:
207
+ # Catch-all for unexpected errors (don't block initialization)
208
+ logger.warning(
209
+ f"Failed to update .gitignore: {e}. "
210
+ f"Please manually add '{pattern}' to your .gitignore"
211
+ )
212
+ return False