mcp-vector-search 0.15.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (86) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/chat.py +534 -0
  6. mcp_vector_search/cli/commands/config.py +393 -0
  7. mcp_vector_search/cli/commands/demo.py +358 -0
  8. mcp_vector_search/cli/commands/index.py +762 -0
  9. mcp_vector_search/cli/commands/init.py +658 -0
  10. mcp_vector_search/cli/commands/install.py +869 -0
  11. mcp_vector_search/cli/commands/install_old.py +700 -0
  12. mcp_vector_search/cli/commands/mcp.py +1254 -0
  13. mcp_vector_search/cli/commands/reset.py +393 -0
  14. mcp_vector_search/cli/commands/search.py +796 -0
  15. mcp_vector_search/cli/commands/setup.py +1133 -0
  16. mcp_vector_search/cli/commands/status.py +584 -0
  17. mcp_vector_search/cli/commands/uninstall.py +404 -0
  18. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  19. mcp_vector_search/cli/commands/visualize/cli.py +265 -0
  20. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  21. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  22. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  23. mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
  24. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  25. mcp_vector_search/cli/commands/visualize/server.py +201 -0
  26. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  27. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  28. mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
  29. mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
  30. mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
  31. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  32. mcp_vector_search/cli/commands/watch.py +287 -0
  33. mcp_vector_search/cli/didyoumean.py +520 -0
  34. mcp_vector_search/cli/export.py +320 -0
  35. mcp_vector_search/cli/history.py +295 -0
  36. mcp_vector_search/cli/interactive.py +342 -0
  37. mcp_vector_search/cli/main.py +484 -0
  38. mcp_vector_search/cli/output.py +414 -0
  39. mcp_vector_search/cli/suggestions.py +375 -0
  40. mcp_vector_search/config/__init__.py +1 -0
  41. mcp_vector_search/config/constants.py +24 -0
  42. mcp_vector_search/config/defaults.py +200 -0
  43. mcp_vector_search/config/settings.py +146 -0
  44. mcp_vector_search/core/__init__.py +1 -0
  45. mcp_vector_search/core/auto_indexer.py +298 -0
  46. mcp_vector_search/core/config_utils.py +394 -0
  47. mcp_vector_search/core/connection_pool.py +360 -0
  48. mcp_vector_search/core/database.py +1237 -0
  49. mcp_vector_search/core/directory_index.py +318 -0
  50. mcp_vector_search/core/embeddings.py +294 -0
  51. mcp_vector_search/core/exceptions.py +89 -0
  52. mcp_vector_search/core/factory.py +318 -0
  53. mcp_vector_search/core/git_hooks.py +345 -0
  54. mcp_vector_search/core/indexer.py +1002 -0
  55. mcp_vector_search/core/llm_client.py +453 -0
  56. mcp_vector_search/core/models.py +294 -0
  57. mcp_vector_search/core/project.py +350 -0
  58. mcp_vector_search/core/scheduler.py +330 -0
  59. mcp_vector_search/core/search.py +952 -0
  60. mcp_vector_search/core/watcher.py +322 -0
  61. mcp_vector_search/mcp/__init__.py +5 -0
  62. mcp_vector_search/mcp/__main__.py +25 -0
  63. mcp_vector_search/mcp/server.py +752 -0
  64. mcp_vector_search/parsers/__init__.py +8 -0
  65. mcp_vector_search/parsers/base.py +296 -0
  66. mcp_vector_search/parsers/dart.py +605 -0
  67. mcp_vector_search/parsers/html.py +413 -0
  68. mcp_vector_search/parsers/javascript.py +643 -0
  69. mcp_vector_search/parsers/php.py +694 -0
  70. mcp_vector_search/parsers/python.py +502 -0
  71. mcp_vector_search/parsers/registry.py +223 -0
  72. mcp_vector_search/parsers/ruby.py +678 -0
  73. mcp_vector_search/parsers/text.py +186 -0
  74. mcp_vector_search/parsers/utils.py +265 -0
  75. mcp_vector_search/py.typed +1 -0
  76. mcp_vector_search/utils/__init__.py +42 -0
  77. mcp_vector_search/utils/gitignore.py +250 -0
  78. mcp_vector_search/utils/gitignore_updater.py +212 -0
  79. mcp_vector_search/utils/monorepo.py +339 -0
  80. mcp_vector_search/utils/timing.py +338 -0
  81. mcp_vector_search/utils/version.py +47 -0
  82. mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
  83. mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
  84. mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
  85. mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
  86. mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,250 @@
1
+ """Gitignore parsing and matching utilities."""
2
+
3
+ import fnmatch
4
+ import re
5
+ from pathlib import Path
6
+
7
+ from loguru import logger
8
+
9
+
10
+ class GitignorePattern:
11
+ """Represents a single gitignore pattern with its matching logic."""
12
+
13
+ def __init__(
14
+ self, pattern: str, is_negation: bool = False, is_directory_only: bool = False
15
+ ):
16
+ """Initialize a gitignore pattern.
17
+
18
+ Args:
19
+ pattern: The pattern string
20
+ is_negation: Whether this is a negation pattern (starts with !)
21
+ is_directory_only: Whether this pattern only matches directories (ends with /)
22
+ """
23
+ self.original_pattern = pattern
24
+ self.is_negation = is_negation
25
+ self.is_directory_only = is_directory_only
26
+ self.pattern = self._normalize_pattern(pattern)
27
+
28
+ def _normalize_pattern(self, pattern: str) -> str:
29
+ """Normalize the pattern for matching."""
30
+ # Remove leading ! for negation patterns
31
+ if pattern.startswith("!"):
32
+ pattern = pattern[1:]
33
+
34
+ # Remove trailing / for directory-only patterns
35
+ if pattern.endswith("/"):
36
+ pattern = pattern[:-1]
37
+
38
+ # Handle leading slash (absolute from repo root)
39
+ if pattern.startswith("/"):
40
+ pattern = pattern[1:]
41
+
42
+ return pattern
43
+
44
+ def matches(self, path: str, is_directory: bool = False) -> bool:
45
+ """Check if this pattern matches the given path.
46
+
47
+ Args:
48
+ path: Relative path from repository root
49
+ is_directory: Whether the path is a directory
50
+
51
+ Returns:
52
+ True if the pattern matches
53
+ """
54
+ # Convert path separators for consistent matching
55
+ path = path.replace("\\", "/")
56
+ pattern = self.pattern.replace("\\", "/")
57
+
58
+ # For directory-only patterns, check if any parent directory matches
59
+ # This implements Git's behavior where "dir/" excludes both the directory
60
+ # AND all files within it recursively
61
+ if self.is_directory_only:
62
+ path_parts = path.split("/")
63
+ # Check each parent directory component
64
+ for i in range(1, len(path_parts) + 1):
65
+ parent = "/".join(path_parts[:i])
66
+ if fnmatch.fnmatch(parent, pattern):
67
+ return True
68
+ # If no parent matches and this is not a directory, don't exclude
69
+ if not is_directory:
70
+ return False
71
+
72
+ # Try exact match first
73
+ if fnmatch.fnmatch(path, pattern):
74
+ return True
75
+
76
+ # Try matching any parent directory
77
+ path_parts = path.split("/")
78
+ for i in range(len(path_parts)):
79
+ subpath = "/".join(path_parts[i:])
80
+ if fnmatch.fnmatch(subpath, pattern):
81
+ return True
82
+
83
+ # Try matching with ** patterns (glob-style)
84
+ if "**" in pattern:
85
+ # Convert ** to regex pattern
86
+ regex_pattern = pattern.replace("**", ".*")
87
+ regex_pattern = regex_pattern.replace("*", "[^/]*")
88
+ regex_pattern = regex_pattern.replace("?", "[^/]")
89
+ regex_pattern = f"^{regex_pattern}$"
90
+
91
+ try:
92
+ if re.match(regex_pattern, path):
93
+ return True
94
+ except re.error:
95
+ # Fallback to simple fnmatch if regex fails
96
+ pass
97
+
98
+ return False
99
+
100
+
101
+ class GitignoreParser:
102
+ """Parser for .gitignore files with proper pattern matching."""
103
+
104
+ def __init__(self, project_root: Path):
105
+ """Initialize gitignore parser.
106
+
107
+ Args:
108
+ project_root: Root directory of the project
109
+ """
110
+ self.project_root = project_root
111
+ self.patterns: list[GitignorePattern] = []
112
+ self._load_gitignore_files()
113
+
114
+ def _load_gitignore_files(self) -> None:
115
+ """Load .gitignore file from project root only.
116
+
117
+ Note: Only the root .gitignore is loaded to avoid performance issues
118
+ with rglob traversing large directory trees (e.g., node_modules with
119
+ 250K+ files). Subdirectory .gitignore files are intentionally skipped
120
+ as they would add significant overhead without much benefit for
121
+ semantic code search indexing.
122
+ """
123
+ # Load root .gitignore only
124
+ root_gitignore = self.project_root / ".gitignore"
125
+ if root_gitignore.exists():
126
+ self._parse_gitignore_file(root_gitignore)
127
+
128
+ def _parse_gitignore_file(self, gitignore_path: Path) -> None:
129
+ """Parse a single .gitignore file.
130
+
131
+ Args:
132
+ gitignore_path: Path to the .gitignore file
133
+ """
134
+ try:
135
+ with open(gitignore_path, encoding="utf-8", errors="ignore") as f:
136
+ lines = f.readlines()
137
+
138
+ for _line_num, line in enumerate(lines, 1):
139
+ line = line.strip()
140
+
141
+ # Skip empty lines and comments
142
+ if not line or line.startswith("#"):
143
+ continue
144
+
145
+ # Check for negation pattern
146
+ is_negation = line.startswith("!")
147
+
148
+ # Check for directory-only pattern
149
+ is_directory_only = line.endswith("/")
150
+
151
+ # Create pattern (all patterns are from root .gitignore)
152
+ pattern = GitignorePattern(line, is_negation, is_directory_only)
153
+ self.patterns.append(pattern)
154
+
155
+ except Exception as e:
156
+ logger.warning(f"Failed to parse {gitignore_path}: {e}")
157
+
158
+ def is_ignored(self, path: Path, is_directory: bool | None = None) -> bool:
159
+ """Check if a path should be ignored according to .gitignore rules.
160
+
161
+ Args:
162
+ path: Path to check (can be absolute or relative to project root)
163
+ is_directory: Optional hint if path is a directory.
164
+ If None, will check filesystem (slower).
165
+ If provided, skips filesystem check (faster).
166
+
167
+ Returns:
168
+ True if the path should be ignored
169
+ """
170
+ try:
171
+ # SHORT-CIRCUIT: If no patterns, nothing is ignored
172
+ # This prevents 200k+ unnecessary filesystem stat() calls on projects
173
+ # without .gitignore files
174
+ if not self.patterns:
175
+ return False
176
+
177
+ # Convert to relative path from project root
178
+ if path.is_absolute():
179
+ relative_path = path.relative_to(self.project_root)
180
+ else:
181
+ relative_path = path
182
+
183
+ path_str = str(relative_path).replace("\\", "/")
184
+
185
+ # Only check if directory when needed and not provided as hint
186
+ # PERFORMANCE: Passing is_directory hint from caller (e.g., os.walk)
187
+ # avoids hundreds of thousands of stat() calls on large repositories
188
+ if is_directory is None:
189
+ is_directory = path.is_dir() if path.exists() else False
190
+
191
+ # Apply patterns in order, with later patterns overriding earlier ones
192
+ ignored = False
193
+
194
+ for pattern in self.patterns:
195
+ if pattern.matches(path_str, is_directory):
196
+ ignored = not pattern.is_negation
197
+
198
+ return ignored
199
+
200
+ except ValueError:
201
+ # Path is not relative to project root
202
+ return False
203
+ except Exception as e:
204
+ logger.debug(f"Error checking gitignore for {path}: {e}")
205
+ return False
206
+
207
+ def get_ignored_patterns(self) -> list[str]:
208
+ """Get list of all ignore patterns.
209
+
210
+ Returns:
211
+ List of pattern strings
212
+ """
213
+ return [p.original_pattern for p in self.patterns if not p.is_negation]
214
+
215
+ def get_negation_patterns(self) -> list[str]:
216
+ """Get list of all negation patterns.
217
+
218
+ Returns:
219
+ List of negation pattern strings
220
+ """
221
+ return [p.original_pattern for p in self.patterns if p.is_negation]
222
+
223
+
224
+ def create_gitignore_parser(project_root: Path) -> GitignoreParser:
225
+ """Create a gitignore parser for the given project.
226
+
227
+ Args:
228
+ project_root: Root directory of the project
229
+
230
+ Returns:
231
+ GitignoreParser instance
232
+ """
233
+ return GitignoreParser(project_root)
234
+
235
+
236
+ def is_path_gitignored(
237
+ path: Path, project_root: Path, is_directory: bool | None = None
238
+ ) -> bool:
239
+ """Quick function to check if a path is gitignored.
240
+
241
+ Args:
242
+ path: Path to check
243
+ project_root: Root directory of the project
244
+ is_directory: Optional hint if path is a directory (avoids filesystem check)
245
+
246
+ Returns:
247
+ True if the path should be ignored
248
+ """
249
+ parser = create_gitignore_parser(project_root)
250
+ return parser.is_ignored(path, is_directory=is_directory)
@@ -0,0 +1,212 @@
1
+ """Gitignore file update utilities for automatic .gitignore entry management."""
2
+
3
+ from pathlib import Path
4
+
5
+ from loguru import logger
6
+
7
+
8
+ def ensure_gitignore_entry(
9
+ project_root: Path,
10
+ pattern: str = ".mcp-vector-search/",
11
+ comment: str | None = "MCP Vector Search index directory",
12
+ create_if_missing: bool = True,
13
+ ) -> bool:
14
+ """Ensure a pattern exists in .gitignore file.
15
+
16
+ This function safely adds a pattern to .gitignore if it doesn't already exist.
17
+ It handles various edge cases including:
18
+ - Non-existent .gitignore files (creates if in git repo)
19
+ - Empty .gitignore files
20
+ - Existing patterns in various formats
21
+ - Negation patterns (conflict detection)
22
+ - Permission errors
23
+ - Encoding issues
24
+
25
+ Design Decision: Non-Blocking Operation
26
+ ----------------------------------------
27
+ This function is designed to be non-critical and non-blocking. It will:
28
+ - NEVER raise exceptions (returns False on errors)
29
+ - Log warnings for failures instead of blocking
30
+ - Allow project initialization to continue even if gitignore update fails
31
+
32
+ Rationale: .gitignore updates are a quality-of-life improvement, not a
33
+ requirement for mcp-vector-search functionality. Users can manually add
34
+ the entry if automatic update fails.
35
+
36
+ Pattern Detection Strategy
37
+ --------------------------
38
+ The function checks for semantic equivalents of the pattern:
39
+ - `.mcp-vector-search/` (exact match)
40
+ - `.mcp-vector-search` (without trailing slash)
41
+ - `.mcp-vector-search/*` (with wildcard)
42
+ - `/.mcp-vector-search/` (root-relative)
43
+
44
+ All are treated as equivalent to avoid duplicate entries.
45
+
46
+ Edge Cases Handled
47
+ ------------------
48
+ 1. .gitignore does not exist -> Create (if in git repo)
49
+ 2. .gitignore is empty -> Add pattern
50
+ 3. Pattern already exists -> Skip (log debug)
51
+ 4. Similar pattern exists -> Skip (log debug)
52
+ 5. Negation pattern exists -> Warn and skip (respects user intent)
53
+ 6. Not a git repository -> Skip (no .gitignore needed)
54
+ 7. Permission denied -> Warn and skip (log manual instructions)
55
+ 8. Encoding errors -> Try fallback encoding
56
+ 9. Missing parent directory -> Should not occur (project_root exists)
57
+ 10. Concurrent modification -> Safe (append operation is atomic-ish)
58
+
59
+ Args:
60
+ project_root: Project root directory (must exist)
61
+ pattern: Pattern to add to .gitignore (default: .mcp-vector-search/)
62
+ comment: Optional comment to add before the pattern
63
+ create_if_missing: Create .gitignore if it doesn't exist (default: True)
64
+
65
+ Returns:
66
+ True if pattern was added or already exists, False on error
67
+
68
+ Performance:
69
+ - Time Complexity: O(n) where n = lines in .gitignore (typically <1000)
70
+ - Space Complexity: O(n) for reading file into memory
71
+ - Expected Runtime: <10ms for typical .gitignore files
72
+
73
+ Notes:
74
+ - Only creates .gitignore in git repositories (checks for .git directory)
75
+ - Preserves existing file structure and encoding (UTF-8)
76
+ - Handles negation patterns gracefully (warns but doesn't override)
77
+ - Non-blocking: logs warnings instead of raising exceptions
78
+
79
+ Examples:
80
+ >>> # Basic usage during project initialization
81
+ >>> ensure_gitignore_entry(Path("/path/to/project"))
82
+ True
83
+
84
+ >>> # Custom pattern with custom comment
85
+ >>> ensure_gitignore_entry(
86
+ ... Path("/path/to/project"),
87
+ ... pattern=".custom-dir/",
88
+ ... comment="Custom tool directory"
89
+ ... )
90
+ True
91
+
92
+ >>> # Don't create .gitignore if missing
93
+ >>> ensure_gitignore_entry(
94
+ ... Path("/path/to/project"),
95
+ ... create_if_missing=False
96
+ ... )
97
+ False
98
+ """
99
+ gitignore_path = project_root / ".gitignore"
100
+
101
+ # Edge Case 1: Check if this is a git repository
102
+ # Only create/modify .gitignore in git repositories to avoid polluting non-git projects
103
+ git_dir = project_root / ".git"
104
+ if not git_dir.exists():
105
+ logger.debug(
106
+ "Not a git repository (no .git directory), skipping .gitignore update"
107
+ )
108
+ return False
109
+
110
+ try:
111
+ # Edge Case 2: Handle non-existent .gitignore
112
+ if not gitignore_path.exists():
113
+ if not create_if_missing:
114
+ logger.debug(".gitignore does not exist and create_if_missing=False")
115
+ return False
116
+
117
+ # Create new .gitignore with the pattern
118
+ content = f"# {comment}\n{pattern}\n" if comment else f"{pattern}\n"
119
+ gitignore_path.write_text(content, encoding="utf-8")
120
+ logger.info(f"Created .gitignore with {pattern} entry")
121
+ return True
122
+
123
+ # Read existing content with UTF-8 encoding
124
+ try:
125
+ content = gitignore_path.read_text(encoding="utf-8")
126
+ except UnicodeDecodeError:
127
+ # Edge Case 8: Fallback to more lenient encoding
128
+ logger.debug("UTF-8 decode failed, trying with error replacement")
129
+ try:
130
+ content = gitignore_path.read_text(encoding="utf-8", errors="replace")
131
+ except Exception as e:
132
+ logger.warning(
133
+ f"Failed to read .gitignore due to encoding error: {e}. "
134
+ f"Please manually add '{pattern}' to your .gitignore"
135
+ )
136
+ return False
137
+
138
+ # Edge Case 3: Handle empty .gitignore
139
+ stripped_content = content.strip()
140
+ if not stripped_content:
141
+ content = f"# {comment}\n{pattern}\n" if comment else f"{pattern}\n"
142
+ gitignore_path.write_text(content, encoding="utf-8")
143
+ logger.info(f"Added {pattern} to empty .gitignore")
144
+ return True
145
+
146
+ # Check for existing patterns (Edge Cases 4, 5, 6)
147
+ lines = content.split("\n")
148
+ normalized_pattern = pattern.rstrip("/").lstrip("/")
149
+
150
+ for line in lines:
151
+ # Skip comments and empty lines
152
+ stripped_line = line.strip()
153
+ if not stripped_line or stripped_line.startswith("#"):
154
+ continue
155
+
156
+ # Edge Case 6: Check for negation pattern (conflict)
157
+ # Negation patterns indicate explicit user intent to track the directory
158
+ if stripped_line.startswith("!") and normalized_pattern in stripped_line:
159
+ logger.warning(
160
+ f".gitignore contains negation pattern: {stripped_line}. "
161
+ "This indicates you want to track .mcp-vector-search/ in git. "
162
+ "Skipping automatic entry to respect your configuration."
163
+ )
164
+ return False
165
+
166
+ # Normalize line for comparison
167
+ normalized_line = stripped_line.rstrip("/").lstrip("/")
168
+
169
+ # Edge Cases 4 & 5: Check for exact or similar matches
170
+ # These patterns are semantically equivalent for .gitignore:
171
+ # - .mcp-vector-search/
172
+ # - .mcp-vector-search
173
+ # - .mcp-vector-search/*
174
+ # - /.mcp-vector-search/
175
+ if (
176
+ normalized_line == normalized_pattern
177
+ or normalized_line == normalized_pattern + "/*"
178
+ ):
179
+ logger.debug(f"Pattern already exists in .gitignore: {stripped_line}")
180
+ return True
181
+
182
+ # Pattern doesn't exist, add it
183
+ # Preserve file structure: ensure proper newline handling
184
+ if not content.endswith("\n"):
185
+ content += "\n"
186
+
187
+ # Add blank line before comment for visual separation
188
+ content += "\n"
189
+
190
+ if comment:
191
+ content += f"# {comment}\n"
192
+ content += f"{pattern}\n"
193
+
194
+ # Write back to file
195
+ gitignore_path.write_text(content, encoding="utf-8")
196
+ logger.info(f"Added {pattern} to .gitignore")
197
+ return True
198
+
199
+ except PermissionError:
200
+ # Edge Case 7: Handle read-only .gitignore or protected directory
201
+ logger.warning(
202
+ f"Cannot update .gitignore: Permission denied. "
203
+ f"Please manually add '{pattern}' to your .gitignore file at {gitignore_path}"
204
+ )
205
+ return False
206
+ except Exception as e:
207
+ # Catch-all for unexpected errors (don't block initialization)
208
+ logger.warning(
209
+ f"Failed to update .gitignore: {e}. "
210
+ f"Please manually add '{pattern}' to your .gitignore"
211
+ )
212
+ return False