mcp-vector-search 0.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/config.py +393 -0
  6. mcp_vector_search/cli/commands/demo.py +358 -0
  7. mcp_vector_search/cli/commands/index.py +744 -0
  8. mcp_vector_search/cli/commands/init.py +645 -0
  9. mcp_vector_search/cli/commands/install.py +675 -0
  10. mcp_vector_search/cli/commands/install_old.py +696 -0
  11. mcp_vector_search/cli/commands/mcp.py +1182 -0
  12. mcp_vector_search/cli/commands/reset.py +393 -0
  13. mcp_vector_search/cli/commands/search.py +773 -0
  14. mcp_vector_search/cli/commands/status.py +549 -0
  15. mcp_vector_search/cli/commands/uninstall.py +485 -0
  16. mcp_vector_search/cli/commands/visualize.py +1467 -0
  17. mcp_vector_search/cli/commands/watch.py +287 -0
  18. mcp_vector_search/cli/didyoumean.py +500 -0
  19. mcp_vector_search/cli/export.py +320 -0
  20. mcp_vector_search/cli/history.py +295 -0
  21. mcp_vector_search/cli/interactive.py +342 -0
  22. mcp_vector_search/cli/main.py +461 -0
  23. mcp_vector_search/cli/output.py +412 -0
  24. mcp_vector_search/cli/suggestions.py +375 -0
  25. mcp_vector_search/config/__init__.py +1 -0
  26. mcp_vector_search/config/constants.py +24 -0
  27. mcp_vector_search/config/defaults.py +200 -0
  28. mcp_vector_search/config/settings.py +134 -0
  29. mcp_vector_search/core/__init__.py +1 -0
  30. mcp_vector_search/core/auto_indexer.py +298 -0
  31. mcp_vector_search/core/connection_pool.py +360 -0
  32. mcp_vector_search/core/database.py +1214 -0
  33. mcp_vector_search/core/directory_index.py +318 -0
  34. mcp_vector_search/core/embeddings.py +294 -0
  35. mcp_vector_search/core/exceptions.py +89 -0
  36. mcp_vector_search/core/factory.py +318 -0
  37. mcp_vector_search/core/git_hooks.py +345 -0
  38. mcp_vector_search/core/indexer.py +1002 -0
  39. mcp_vector_search/core/models.py +294 -0
  40. mcp_vector_search/core/project.py +333 -0
  41. mcp_vector_search/core/scheduler.py +330 -0
  42. mcp_vector_search/core/search.py +952 -0
  43. mcp_vector_search/core/watcher.py +322 -0
  44. mcp_vector_search/mcp/__init__.py +5 -0
  45. mcp_vector_search/mcp/__main__.py +25 -0
  46. mcp_vector_search/mcp/server.py +733 -0
  47. mcp_vector_search/parsers/__init__.py +8 -0
  48. mcp_vector_search/parsers/base.py +296 -0
  49. mcp_vector_search/parsers/dart.py +605 -0
  50. mcp_vector_search/parsers/html.py +413 -0
  51. mcp_vector_search/parsers/javascript.py +643 -0
  52. mcp_vector_search/parsers/php.py +694 -0
  53. mcp_vector_search/parsers/python.py +502 -0
  54. mcp_vector_search/parsers/registry.py +223 -0
  55. mcp_vector_search/parsers/ruby.py +678 -0
  56. mcp_vector_search/parsers/text.py +186 -0
  57. mcp_vector_search/parsers/utils.py +265 -0
  58. mcp_vector_search/py.typed +1 -0
  59. mcp_vector_search/utils/__init__.py +40 -0
  60. mcp_vector_search/utils/gitignore.py +250 -0
  61. mcp_vector_search/utils/monorepo.py +277 -0
  62. mcp_vector_search/utils/timing.py +334 -0
  63. mcp_vector_search/utils/version.py +47 -0
  64. mcp_vector_search-0.12.6.dist-info/METADATA +754 -0
  65. mcp_vector_search-0.12.6.dist-info/RECORD +68 -0
  66. mcp_vector_search-0.12.6.dist-info/WHEEL +4 -0
  67. mcp_vector_search-0.12.6.dist-info/entry_points.txt +2 -0
  68. mcp_vector_search-0.12.6.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,250 @@
1
+ """Gitignore parsing and matching utilities."""
2
+
3
+ import fnmatch
4
+ import re
5
+ from pathlib import Path
6
+
7
+ from loguru import logger
8
+
9
+
10
+ class GitignorePattern:
11
+ """Represents a single gitignore pattern with its matching logic."""
12
+
13
+ def __init__(
14
+ self, pattern: str, is_negation: bool = False, is_directory_only: bool = False
15
+ ):
16
+ """Initialize a gitignore pattern.
17
+
18
+ Args:
19
+ pattern: The pattern string
20
+ is_negation: Whether this is a negation pattern (starts with !)
21
+ is_directory_only: Whether this pattern only matches directories (ends with /)
22
+ """
23
+ self.original_pattern = pattern
24
+ self.is_negation = is_negation
25
+ self.is_directory_only = is_directory_only
26
+ self.pattern = self._normalize_pattern(pattern)
27
+
28
+ def _normalize_pattern(self, pattern: str) -> str:
29
+ """Normalize the pattern for matching."""
30
+ # Remove leading ! for negation patterns
31
+ if pattern.startswith("!"):
32
+ pattern = pattern[1:]
33
+
34
+ # Remove trailing / for directory-only patterns
35
+ if pattern.endswith("/"):
36
+ pattern = pattern[:-1]
37
+
38
+ # Handle leading slash (absolute from repo root)
39
+ if pattern.startswith("/"):
40
+ pattern = pattern[1:]
41
+
42
+ return pattern
43
+
44
+ def matches(self, path: str, is_directory: bool = False) -> bool:
45
+ """Check if this pattern matches the given path.
46
+
47
+ Args:
48
+ path: Relative path from repository root
49
+ is_directory: Whether the path is a directory
50
+
51
+ Returns:
52
+ True if the pattern matches
53
+ """
54
+ # Convert path separators for consistent matching
55
+ path = path.replace("\\", "/")
56
+ pattern = self.pattern.replace("\\", "/")
57
+
58
+ # For directory-only patterns, check if any parent directory matches
59
+ # This implements Git's behavior where "dir/" excludes both the directory
60
+ # AND all files within it recursively
61
+ if self.is_directory_only:
62
+ path_parts = path.split("/")
63
+ # Check each parent directory component
64
+ for i in range(1, len(path_parts) + 1):
65
+ parent = "/".join(path_parts[:i])
66
+ if fnmatch.fnmatch(parent, pattern):
67
+ return True
68
+ # If no parent matches and this is not a directory, don't exclude
69
+ if not is_directory:
70
+ return False
71
+
72
+ # Try exact match first
73
+ if fnmatch.fnmatch(path, pattern):
74
+ return True
75
+
76
+ # Try matching any parent directory
77
+ path_parts = path.split("/")
78
+ for i in range(len(path_parts)):
79
+ subpath = "/".join(path_parts[i:])
80
+ if fnmatch.fnmatch(subpath, pattern):
81
+ return True
82
+
83
+ # Try matching with ** patterns (glob-style)
84
+ if "**" in pattern:
85
+ # Convert ** to regex pattern
86
+ regex_pattern = pattern.replace("**", ".*")
87
+ regex_pattern = regex_pattern.replace("*", "[^/]*")
88
+ regex_pattern = regex_pattern.replace("?", "[^/]")
89
+ regex_pattern = f"^{regex_pattern}$"
90
+
91
+ try:
92
+ if re.match(regex_pattern, path):
93
+ return True
94
+ except re.error:
95
+ # Fallback to simple fnmatch if regex fails
96
+ pass
97
+
98
+ return False
99
+
100
+
101
+ class GitignoreParser:
102
+ """Parser for .gitignore files with proper pattern matching."""
103
+
104
+ def __init__(self, project_root: Path):
105
+ """Initialize gitignore parser.
106
+
107
+ Args:
108
+ project_root: Root directory of the project
109
+ """
110
+ self.project_root = project_root
111
+ self.patterns: list[GitignorePattern] = []
112
+ self._load_gitignore_files()
113
+
114
+ def _load_gitignore_files(self) -> None:
115
+ """Load .gitignore file from project root only.
116
+
117
+ Note: Only the root .gitignore is loaded to avoid performance issues
118
+ with rglob traversing large directory trees (e.g., node_modules with
119
+ 250K+ files). Subdirectory .gitignore files are intentionally skipped
120
+ as they would add significant overhead without much benefit for
121
+ semantic code search indexing.
122
+ """
123
+ # Load root .gitignore only
124
+ root_gitignore = self.project_root / ".gitignore"
125
+ if root_gitignore.exists():
126
+ self._parse_gitignore_file(root_gitignore)
127
+
128
+ def _parse_gitignore_file(self, gitignore_path: Path) -> None:
129
+ """Parse a single .gitignore file.
130
+
131
+ Args:
132
+ gitignore_path: Path to the .gitignore file
133
+ """
134
+ try:
135
+ with open(gitignore_path, encoding="utf-8", errors="ignore") as f:
136
+ lines = f.readlines()
137
+
138
+ for _line_num, line in enumerate(lines, 1):
139
+ line = line.strip()
140
+
141
+ # Skip empty lines and comments
142
+ if not line or line.startswith("#"):
143
+ continue
144
+
145
+ # Check for negation pattern
146
+ is_negation = line.startswith("!")
147
+
148
+ # Check for directory-only pattern
149
+ is_directory_only = line.endswith("/")
150
+
151
+ # Create pattern (all patterns are from root .gitignore)
152
+ pattern = GitignorePattern(line, is_negation, is_directory_only)
153
+ self.patterns.append(pattern)
154
+
155
+ except Exception as e:
156
+ logger.warning(f"Failed to parse {gitignore_path}: {e}")
157
+
158
+ def is_ignored(self, path: Path, is_directory: bool | None = None) -> bool:
159
+ """Check if a path should be ignored according to .gitignore rules.
160
+
161
+ Args:
162
+ path: Path to check (can be absolute or relative to project root)
163
+ is_directory: Optional hint if path is a directory.
164
+ If None, will check filesystem (slower).
165
+ If provided, skips filesystem check (faster).
166
+
167
+ Returns:
168
+ True if the path should be ignored
169
+ """
170
+ try:
171
+ # SHORT-CIRCUIT: If no patterns, nothing is ignored
172
+ # This prevents 200k+ unnecessary filesystem stat() calls on projects
173
+ # without .gitignore files
174
+ if not self.patterns:
175
+ return False
176
+
177
+ # Convert to relative path from project root
178
+ if path.is_absolute():
179
+ relative_path = path.relative_to(self.project_root)
180
+ else:
181
+ relative_path = path
182
+
183
+ path_str = str(relative_path).replace("\\", "/")
184
+
185
+ # Only check if directory when needed and not provided as hint
186
+ # PERFORMANCE: Passing is_directory hint from caller (e.g., os.walk)
187
+ # avoids hundreds of thousands of stat() calls on large repositories
188
+ if is_directory is None:
189
+ is_directory = path.is_dir() if path.exists() else False
190
+
191
+ # Apply patterns in order, with later patterns overriding earlier ones
192
+ ignored = False
193
+
194
+ for pattern in self.patterns:
195
+ if pattern.matches(path_str, is_directory):
196
+ ignored = not pattern.is_negation
197
+
198
+ return ignored
199
+
200
+ except ValueError:
201
+ # Path is not relative to project root
202
+ return False
203
+ except Exception as e:
204
+ logger.debug(f"Error checking gitignore for {path}: {e}")
205
+ return False
206
+
207
+ def get_ignored_patterns(self) -> list[str]:
208
+ """Get list of all ignore patterns.
209
+
210
+ Returns:
211
+ List of pattern strings
212
+ """
213
+ return [p.original_pattern for p in self.patterns if not p.is_negation]
214
+
215
+ def get_negation_patterns(self) -> list[str]:
216
+ """Get list of all negation patterns.
217
+
218
+ Returns:
219
+ List of negation pattern strings
220
+ """
221
+ return [p.original_pattern for p in self.patterns if p.is_negation]
222
+
223
+
224
+ def create_gitignore_parser(project_root: Path) -> GitignoreParser:
225
+ """Create a gitignore parser for the given project.
226
+
227
+ Args:
228
+ project_root: Root directory of the project
229
+
230
+ Returns:
231
+ GitignoreParser instance
232
+ """
233
+ return GitignoreParser(project_root)
234
+
235
+
236
+ def is_path_gitignored(
237
+ path: Path, project_root: Path, is_directory: bool | None = None
238
+ ) -> bool:
239
+ """Quick function to check if a path is gitignored.
240
+
241
+ Args:
242
+ path: Path to check
243
+ project_root: Root directory of the project
244
+ is_directory: Optional hint if path is a directory (avoids filesystem check)
245
+
246
+ Returns:
247
+ True if the path should be ignored
248
+ """
249
+ parser = create_gitignore_parser(project_root)
250
+ return parser.is_ignored(path, is_directory=is_directory)
@@ -0,0 +1,277 @@
1
+ """Monorepo detection and subproject identification."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import NamedTuple
6
+
7
+ from loguru import logger
8
+
9
+
10
+ class Subproject(NamedTuple):
11
+ """Represents a subproject in a monorepo."""
12
+
13
+ name: str # "ewtn-plus-foundation"
14
+ path: Path # Absolute path to subproject
15
+ relative_path: str # Relative to monorepo root
16
+
17
+
18
+ class MonorepoDetector:
19
+ """Detects monorepo structure and identifies subprojects."""
20
+
21
+ def __init__(self, project_root: Path):
22
+ """Initialize monorepo detector.
23
+
24
+ Args:
25
+ project_root: Root directory of the project
26
+ """
27
+ self.project_root = project_root
28
+ self._subprojects: list[Subproject] | None = None
29
+
30
+ def is_monorepo(self) -> bool:
31
+ """Check if project is a monorepo.
32
+
33
+ Returns:
34
+ True if monorepo structure detected
35
+ """
36
+ return bool(self.detect_subprojects())
37
+
38
+ def detect_subprojects(self) -> list[Subproject]:
39
+ """Detect all subprojects in the monorepo.
40
+
41
+ Returns:
42
+ List of detected subprojects
43
+ """
44
+ if self._subprojects is not None:
45
+ return self._subprojects
46
+
47
+ subprojects = []
48
+
49
+ # Try package.json workspaces (npm/yarn/pnpm)
50
+ subprojects.extend(self._detect_npm_workspaces())
51
+
52
+ # Try lerna.json
53
+ if not subprojects:
54
+ subprojects.extend(self._detect_lerna_packages())
55
+
56
+ # Try pnpm-workspace.yaml
57
+ if not subprojects:
58
+ subprojects.extend(self._detect_pnpm_workspaces())
59
+
60
+ # Try nx workspace
61
+ if not subprojects:
62
+ subprojects.extend(self._detect_nx_workspace())
63
+
64
+ # Fallback: Look for multiple package.json files
65
+ if not subprojects:
66
+ subprojects.extend(self._detect_by_package_json())
67
+
68
+ self._subprojects = subprojects
69
+ logger.debug(f"Detected {len(subprojects)} subprojects in {self.project_root}")
70
+
71
+ return subprojects
72
+
73
+ def _detect_npm_workspaces(self) -> list[Subproject]:
74
+ """Detect npm/yarn/pnpm workspaces from package.json.
75
+
76
+ Returns:
77
+ List of subprojects from workspaces
78
+ """
79
+ package_json = self.project_root / "package.json"
80
+ if not package_json.exists():
81
+ return []
82
+
83
+ try:
84
+ with open(package_json) as f:
85
+ data = json.load(f)
86
+
87
+ workspaces = data.get("workspaces", [])
88
+
89
+ # Handle both array and object format
90
+ if isinstance(workspaces, dict):
91
+ workspaces = workspaces.get("packages", [])
92
+
93
+ return self._expand_workspace_patterns(workspaces)
94
+
95
+ except Exception as e:
96
+ logger.debug(f"Failed to parse package.json workspaces: {e}")
97
+ return []
98
+
99
+ def _detect_lerna_packages(self) -> list[Subproject]:
100
+ """Detect lerna packages from lerna.json.
101
+
102
+ Returns:
103
+ List of subprojects from lerna
104
+ """
105
+ lerna_json = self.project_root / "lerna.json"
106
+ if not lerna_json.exists():
107
+ return []
108
+
109
+ try:
110
+ with open(lerna_json) as f:
111
+ data = json.load(f)
112
+
113
+ packages = data.get("packages", ["packages/*"])
114
+ return self._expand_workspace_patterns(packages)
115
+
116
+ except Exception as e:
117
+ logger.debug(f"Failed to parse lerna.json: {e}")
118
+ return []
119
+
120
+ def _detect_pnpm_workspaces(self) -> list[Subproject]:
121
+ """Detect pnpm workspaces from pnpm-workspace.yaml.
122
+
123
+ Returns:
124
+ List of subprojects from pnpm
125
+ """
126
+ pnpm_workspace = self.project_root / "pnpm-workspace.yaml"
127
+ if not pnpm_workspace.exists():
128
+ return []
129
+
130
+ try:
131
+ import yaml
132
+
133
+ with open(pnpm_workspace) as f:
134
+ data = yaml.safe_load(f)
135
+
136
+ packages = data.get("packages", [])
137
+ return self._expand_workspace_patterns(packages)
138
+
139
+ except ImportError:
140
+ logger.debug("pyyaml not installed, skipping pnpm-workspace.yaml detection")
141
+ return []
142
+ except Exception as e:
143
+ logger.debug(f"Failed to parse pnpm-workspace.yaml: {e}")
144
+ return []
145
+
146
+ def _detect_nx_workspace(self) -> list[Subproject]:
147
+ """Detect nx workspace projects.
148
+
149
+ Returns:
150
+ List of subprojects from nx workspace
151
+ """
152
+ nx_json = self.project_root / "nx.json"
153
+ workspace_json = self.project_root / "workspace.json"
154
+
155
+ if not (nx_json.exists() or workspace_json.exists()):
156
+ return []
157
+
158
+ # Nx projects are typically in apps/ and libs/
159
+ subprojects = []
160
+ for base_dir in ["apps", "libs", "packages"]:
161
+ base_path = self.project_root / base_dir
162
+ if base_path.exists():
163
+ for subdir in base_path.iterdir():
164
+ if subdir.is_dir() and not subdir.name.startswith("."):
165
+ package_json = subdir / "package.json"
166
+ name = self._get_package_name(package_json) or subdir.name
167
+ relative = str(subdir.relative_to(self.project_root))
168
+ subprojects.append(Subproject(name, subdir, relative))
169
+
170
+ return subprojects
171
+
172
+ def _detect_by_package_json(self) -> list[Subproject]:
173
+ """Fallback: Find all directories with package.json.
174
+
175
+ Returns:
176
+ List of subprojects by package.json presence
177
+ """
178
+ subprojects = []
179
+
180
+ # Only search up to 3 levels deep
181
+ for package_json in self.project_root.rglob("package.json"):
182
+ # Skip node_modules
183
+ if "node_modules" in package_json.parts:
184
+ continue
185
+
186
+ # Skip root package.json
187
+ if package_json.parent == self.project_root:
188
+ continue
189
+
190
+ # Check depth
191
+ relative_parts = package_json.relative_to(self.project_root).parts
192
+ if len(relative_parts) > 4: # Too deep
193
+ continue
194
+
195
+ subdir = package_json.parent
196
+ name = self._get_package_name(package_json) or subdir.name
197
+ relative = str(subdir.relative_to(self.project_root))
198
+ subprojects.append(Subproject(name, subdir, relative))
199
+
200
+ return subprojects
201
+
202
+ def _expand_workspace_patterns(self, patterns: list[str]) -> list[Subproject]:
203
+ """Expand workspace glob patterns to actual directories.
204
+
205
+ Args:
206
+ patterns: List of glob patterns (e.g., ["packages/*", "apps/*"])
207
+
208
+ Returns:
209
+ List of subprojects matching patterns
210
+ """
211
+ subprojects = []
212
+
213
+ for pattern in patterns:
214
+ # Remove negation patterns (e.g., "!packages/excluded")
215
+ if pattern.startswith("!"):
216
+ continue
217
+
218
+ # Expand glob pattern
219
+ for path in self.project_root.glob(pattern):
220
+ if not path.is_dir():
221
+ continue
222
+
223
+ if path.name.startswith("."):
224
+ continue
225
+
226
+ # Try to get name from package.json
227
+ package_json = path / "package.json"
228
+ name = self._get_package_name(package_json) or path.name
229
+ relative = str(path.relative_to(self.project_root))
230
+
231
+ subprojects.append(Subproject(name, path, relative))
232
+
233
+ return subprojects
234
+
235
+ def _get_package_name(self, package_json: Path) -> str | None:
236
+ """Get package name from package.json.
237
+
238
+ Args:
239
+ package_json: Path to package.json file
240
+
241
+ Returns:
242
+ Package name or None
243
+ """
244
+ if not package_json.exists():
245
+ return None
246
+
247
+ try:
248
+ with open(package_json) as f:
249
+ data = json.load(f)
250
+ return data.get("name")
251
+ except Exception:
252
+ return None
253
+
254
+ def get_subproject_for_file(self, file_path: Path) -> Subproject | None:
255
+ """Determine which subproject a file belongs to.
256
+
257
+ Args:
258
+ file_path: Path to file
259
+
260
+ Returns:
261
+ Subproject containing the file, or None
262
+ """
263
+ subprojects = self.detect_subprojects()
264
+
265
+ if not subprojects:
266
+ return None
267
+
268
+ # Find the most specific (deepest) subproject containing this file
269
+ matching_subprojects = [
270
+ sp for sp in subprojects if file_path.is_relative_to(sp.path)
271
+ ]
272
+
273
+ if not matching_subprojects:
274
+ return None
275
+
276
+ # Return the deepest match (longest path)
277
+ return max(matching_subprojects, key=lambda sp: len(sp.path.parts))