mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (49) hide show
  1. mcp_vector_search/__init__.py +3 -2
  2. mcp_vector_search/cli/commands/auto_index.py +397 -0
  3. mcp_vector_search/cli/commands/config.py +88 -40
  4. mcp_vector_search/cli/commands/index.py +198 -52
  5. mcp_vector_search/cli/commands/init.py +472 -58
  6. mcp_vector_search/cli/commands/install.py +284 -0
  7. mcp_vector_search/cli/commands/mcp.py +495 -0
  8. mcp_vector_search/cli/commands/search.py +241 -87
  9. mcp_vector_search/cli/commands/status.py +184 -58
  10. mcp_vector_search/cli/commands/watch.py +34 -35
  11. mcp_vector_search/cli/didyoumean.py +184 -0
  12. mcp_vector_search/cli/export.py +320 -0
  13. mcp_vector_search/cli/history.py +292 -0
  14. mcp_vector_search/cli/interactive.py +342 -0
  15. mcp_vector_search/cli/main.py +163 -26
  16. mcp_vector_search/cli/output.py +63 -45
  17. mcp_vector_search/config/defaults.py +50 -36
  18. mcp_vector_search/config/settings.py +49 -35
  19. mcp_vector_search/core/auto_indexer.py +298 -0
  20. mcp_vector_search/core/connection_pool.py +322 -0
  21. mcp_vector_search/core/database.py +335 -25
  22. mcp_vector_search/core/embeddings.py +73 -29
  23. mcp_vector_search/core/exceptions.py +19 -2
  24. mcp_vector_search/core/factory.py +310 -0
  25. mcp_vector_search/core/git_hooks.py +345 -0
  26. mcp_vector_search/core/indexer.py +237 -73
  27. mcp_vector_search/core/models.py +21 -19
  28. mcp_vector_search/core/project.py +73 -58
  29. mcp_vector_search/core/scheduler.py +330 -0
  30. mcp_vector_search/core/search.py +574 -86
  31. mcp_vector_search/core/watcher.py +48 -46
  32. mcp_vector_search/mcp/__init__.py +4 -0
  33. mcp_vector_search/mcp/__main__.py +25 -0
  34. mcp_vector_search/mcp/server.py +701 -0
  35. mcp_vector_search/parsers/base.py +30 -31
  36. mcp_vector_search/parsers/javascript.py +74 -48
  37. mcp_vector_search/parsers/python.py +57 -49
  38. mcp_vector_search/parsers/registry.py +47 -32
  39. mcp_vector_search/parsers/text.py +179 -0
  40. mcp_vector_search/utils/__init__.py +40 -0
  41. mcp_vector_search/utils/gitignore.py +229 -0
  42. mcp_vector_search/utils/timing.py +334 -0
  43. mcp_vector_search/utils/version.py +47 -0
  44. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/METADATA +173 -7
  45. mcp_vector_search-0.4.11.dist-info/RECORD +54 -0
  46. mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
  47. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/WHEEL +0 -0
  48. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/entry_points.txt +0 -0
  49. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,179 @@
1
+ """Text file parser for MCP Vector Search."""
2
+
3
+ from pathlib import Path
4
+
5
+ from ..core.models import CodeChunk
6
+ from .base import BaseParser
7
+
8
+
9
+ class TextParser(BaseParser):
10
+ """Parser for plain text files (.txt)."""
11
+
12
+ def __init__(self) -> None:
13
+ """Initialize text parser."""
14
+ super().__init__("text")
15
+
16
+ async def parse_file(self, file_path: Path) -> list[CodeChunk]:
17
+ """Parse a text file and extract chunks.
18
+
19
+ Args:
20
+ file_path: Path to the text file
21
+
22
+ Returns:
23
+ List of text chunks
24
+ """
25
+ try:
26
+ with open(file_path, encoding="utf-8") as f:
27
+ content = f.read()
28
+ return await self.parse_content(content, file_path)
29
+ except Exception:
30
+ # Return empty list if file can't be read
31
+ return []
32
+
33
+ async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
34
+ """Parse text content into semantic chunks.
35
+
36
+ Uses paragraph-based chunking for better semantic coherence.
37
+ Falls back to line-based chunking for non-paragraph text.
38
+
39
+ Args:
40
+ content: Text content to parse
41
+ file_path: Path to the source file
42
+
43
+ Returns:
44
+ List of text chunks
45
+ """
46
+ if not content.strip():
47
+ return []
48
+
49
+ chunks = []
50
+ lines = content.splitlines(keepends=True)
51
+
52
+ # Try paragraph-based chunking first
53
+ paragraphs = self._extract_paragraphs(content)
54
+
55
+ if paragraphs:
56
+ # Use paragraph-based chunking
57
+ for para_info in paragraphs:
58
+ chunk = self._create_chunk(
59
+ content=para_info["content"],
60
+ file_path=file_path,
61
+ start_line=para_info["start_line"],
62
+ end_line=para_info["end_line"],
63
+ chunk_type="text",
64
+ )
65
+ chunks.append(chunk)
66
+ else:
67
+ # Fall back to line-based chunking for non-paragraph text
68
+ # Use smaller chunks for text files (30 lines instead of 50)
69
+ chunk_size = 30
70
+ for i in range(0, len(lines), chunk_size):
71
+ start_line = i + 1
72
+ end_line = min(i + chunk_size, len(lines))
73
+
74
+ chunk_content = "".join(lines[i:end_line])
75
+
76
+ if chunk_content.strip():
77
+ chunk = self._create_chunk(
78
+ content=chunk_content,
79
+ file_path=file_path,
80
+ start_line=start_line,
81
+ end_line=end_line,
82
+ chunk_type="text",
83
+ )
84
+ chunks.append(chunk)
85
+
86
+ return chunks
87
+
88
+ def _extract_paragraphs(self, content: str) -> list[dict]:
89
+ """Extract paragraphs from text content.
90
+
91
+ A paragraph is defined as one or more non-empty lines
92
+ separated by empty lines.
93
+
94
+ Args:
95
+ content: Text content
96
+
97
+ Returns:
98
+ List of paragraph info dictionaries
99
+ """
100
+ lines = content.splitlines(keepends=True)
101
+ paragraphs = []
102
+ current_para = []
103
+ start_line = 1
104
+
105
+ for i, line in enumerate(lines, 1):
106
+ if line.strip():
107
+ if not current_para:
108
+ start_line = i
109
+ current_para.append(line)
110
+ else:
111
+ if current_para:
112
+ # End of paragraph
113
+ para_content = "".join(current_para)
114
+ if len(para_content.strip()) > 20: # Minimum paragraph size
115
+ paragraphs.append({
116
+ "content": para_content,
117
+ "start_line": start_line,
118
+ "end_line": i - 1
119
+ })
120
+ current_para = []
121
+
122
+ # Handle last paragraph if exists
123
+ if current_para:
124
+ para_content = "".join(current_para)
125
+ if len(para_content.strip()) > 20:
126
+ paragraphs.append({
127
+ "content": para_content,
128
+ "start_line": start_line,
129
+ "end_line": len(lines)
130
+ })
131
+
132
+ # If we have very few paragraphs, merge small ones
133
+ if paragraphs:
134
+ merged = self._merge_small_paragraphs(paragraphs)
135
+ return merged
136
+
137
+ return []
138
+
139
+ def _merge_small_paragraphs(self, paragraphs: list[dict], target_size: int = 200) -> list[dict]:
140
+ """Merge small paragraphs to create more substantial chunks.
141
+
142
+ Args:
143
+ paragraphs: List of paragraph dictionaries
144
+ target_size: Target size for merged paragraphs in characters
145
+
146
+ Returns:
147
+ List of merged paragraph dictionaries
148
+ """
149
+ merged = []
150
+ current_merge = None
151
+
152
+ for para in paragraphs:
153
+ para_len = len(para["content"])
154
+
155
+ if current_merge is None:
156
+ current_merge = para.copy()
157
+ elif len(current_merge["content"]) + para_len < target_size * 2:
158
+ # Merge with current
159
+ current_merge["content"] += "\n" + para["content"]
160
+ current_merge["end_line"] = para["end_line"]
161
+ else:
162
+ # Start new merge
163
+ if len(current_merge["content"].strip()) > 20:
164
+ merged.append(current_merge)
165
+ current_merge = para.copy()
166
+
167
+ # Add last merge
168
+ if current_merge and len(current_merge["content"].strip()) > 20:
169
+ merged.append(current_merge)
170
+
171
+ return merged
172
+
173
+ def get_supported_extensions(self) -> list[str]:
174
+ """Get list of supported file extensions.
175
+
176
+ Returns:
177
+ List of supported extensions
178
+ """
179
+ return [".txt"]
@@ -0,0 +1,40 @@
1
+ """Utility modules for MCP Vector Search."""
2
+
3
+ from .gitignore import (
4
+ GitignoreParser,
5
+ GitignorePattern,
6
+ create_gitignore_parser,
7
+ is_path_gitignored,
8
+ )
9
+ from .timing import (
10
+ PerformanceProfiler,
11
+ SearchProfiler,
12
+ TimingResult,
13
+ get_global_profiler,
14
+ print_global_report,
15
+ time_async_block,
16
+ time_block,
17
+ time_function,
18
+ )
19
+ from .version import get_user_agent, get_version_info, get_version_string
20
+
21
+ __all__ = [
22
+ # Gitignore utilities
23
+ "GitignoreParser",
24
+ "GitignorePattern",
25
+ "create_gitignore_parser",
26
+ "is_path_gitignored",
27
+ # Timing utilities
28
+ "PerformanceProfiler",
29
+ "TimingResult",
30
+ "time_function",
31
+ "time_block",
32
+ "time_async_block",
33
+ "get_global_profiler",
34
+ "print_global_report",
35
+ "SearchProfiler",
36
+ # Version utilities
37
+ "get_version_info",
38
+ "get_version_string",
39
+ "get_user_agent",
40
+ ]
@@ -0,0 +1,229 @@
1
+ """Gitignore parsing and matching utilities."""
2
+
3
+ import fnmatch
4
+ import re
5
+ from pathlib import Path
6
+ from typing import List, Set
7
+
8
+ from loguru import logger
9
+
10
+
11
+ class GitignorePattern:
12
+ """Represents a single gitignore pattern with its matching logic."""
13
+
14
+ def __init__(self, pattern: str, is_negation: bool = False, is_directory_only: bool = False):
15
+ """Initialize a gitignore pattern.
16
+
17
+ Args:
18
+ pattern: The pattern string
19
+ is_negation: Whether this is a negation pattern (starts with !)
20
+ is_directory_only: Whether this pattern only matches directories (ends with /)
21
+ """
22
+ self.original_pattern = pattern
23
+ self.is_negation = is_negation
24
+ self.is_directory_only = is_directory_only
25
+ self.pattern = self._normalize_pattern(pattern)
26
+
27
+ def _normalize_pattern(self, pattern: str) -> str:
28
+ """Normalize the pattern for matching."""
29
+ # Remove leading ! for negation patterns
30
+ if pattern.startswith('!'):
31
+ pattern = pattern[1:]
32
+
33
+ # Remove trailing / for directory-only patterns
34
+ if pattern.endswith('/'):
35
+ pattern = pattern[:-1]
36
+
37
+ # Handle leading slash (absolute from repo root)
38
+ if pattern.startswith('/'):
39
+ pattern = pattern[1:]
40
+
41
+ return pattern
42
+
43
+ def matches(self, path: str, is_directory: bool = False) -> bool:
44
+ """Check if this pattern matches the given path.
45
+
46
+ Args:
47
+ path: Relative path from repository root
48
+ is_directory: Whether the path is a directory
49
+
50
+ Returns:
51
+ True if the pattern matches
52
+ """
53
+ # Directory-only patterns only match directories
54
+ if self.is_directory_only and not is_directory:
55
+ return False
56
+
57
+ # Convert path separators for consistent matching
58
+ path = path.replace('\\', '/')
59
+ pattern = self.pattern.replace('\\', '/')
60
+
61
+ # Try exact match first
62
+ if fnmatch.fnmatch(path, pattern):
63
+ return True
64
+
65
+ # Try matching any parent directory
66
+ path_parts = path.split('/')
67
+ for i in range(len(path_parts)):
68
+ subpath = '/'.join(path_parts[i:])
69
+ if fnmatch.fnmatch(subpath, pattern):
70
+ return True
71
+
72
+ # Try matching with ** patterns (glob-style)
73
+ if '**' in pattern:
74
+ # Convert ** to regex pattern
75
+ regex_pattern = pattern.replace('**', '.*')
76
+ regex_pattern = regex_pattern.replace('*', '[^/]*')
77
+ regex_pattern = regex_pattern.replace('?', '[^/]')
78
+ regex_pattern = f'^{regex_pattern}$'
79
+
80
+ try:
81
+ if re.match(regex_pattern, path):
82
+ return True
83
+ except re.error:
84
+ # Fallback to simple fnmatch if regex fails
85
+ pass
86
+
87
+ return False
88
+
89
+
90
+ class GitignoreParser:
91
+ """Parser for .gitignore files with proper pattern matching."""
92
+
93
+ def __init__(self, project_root: Path):
94
+ """Initialize gitignore parser.
95
+
96
+ Args:
97
+ project_root: Root directory of the project
98
+ """
99
+ self.project_root = project_root
100
+ self.patterns: List[GitignorePattern] = []
101
+ self._load_gitignore_files()
102
+
103
+ def _load_gitignore_files(self) -> None:
104
+ """Load all .gitignore files in the project hierarchy."""
105
+ # Load global .gitignore first (if exists)
106
+ global_gitignore = self.project_root / '.gitignore'
107
+ if global_gitignore.exists():
108
+ self._parse_gitignore_file(global_gitignore)
109
+
110
+ # Load .gitignore files in subdirectories
111
+ for gitignore_file in self.project_root.rglob('.gitignore'):
112
+ if gitignore_file != global_gitignore:
113
+ self._parse_gitignore_file(gitignore_file)
114
+
115
+ def _parse_gitignore_file(self, gitignore_path: Path) -> None:
116
+ """Parse a single .gitignore file.
117
+
118
+ Args:
119
+ gitignore_path: Path to the .gitignore file
120
+ """
121
+ try:
122
+ with open(gitignore_path, 'r', encoding='utf-8', errors='ignore') as f:
123
+ lines = f.readlines()
124
+
125
+ for line_num, line in enumerate(lines, 1):
126
+ line = line.strip()
127
+
128
+ # Skip empty lines and comments
129
+ if not line or line.startswith('#'):
130
+ continue
131
+
132
+ # Check for negation pattern
133
+ is_negation = line.startswith('!')
134
+
135
+ # Check for directory-only pattern
136
+ is_directory_only = line.endswith('/')
137
+
138
+ # Create pattern relative to the .gitignore file's directory
139
+ gitignore_dir = gitignore_path.parent
140
+ if gitignore_dir != self.project_root:
141
+ # Adjust pattern for subdirectory .gitignore files
142
+ relative_dir = gitignore_dir.relative_to(self.project_root)
143
+ if not line.startswith('/') and not is_negation:
144
+ line = str(relative_dir / line)
145
+ elif is_negation and not line[1:].startswith('/'):
146
+ line = '!' + str(relative_dir / line[1:])
147
+
148
+ pattern = GitignorePattern(line, is_negation, is_directory_only)
149
+ self.patterns.append(pattern)
150
+
151
+ except Exception as e:
152
+ logger.warning(f"Failed to parse {gitignore_path}: {e}")
153
+
154
+ def is_ignored(self, path: Path) -> bool:
155
+ """Check if a path should be ignored according to .gitignore rules.
156
+
157
+ Args:
158
+ path: Path to check (can be absolute or relative to project root)
159
+
160
+ Returns:
161
+ True if the path should be ignored
162
+ """
163
+ try:
164
+ # Convert to relative path from project root
165
+ if path.is_absolute():
166
+ relative_path = path.relative_to(self.project_root)
167
+ else:
168
+ relative_path = path
169
+
170
+ path_str = str(relative_path).replace('\\', '/')
171
+ is_directory = path.is_dir() if path.exists() else False
172
+
173
+ # Apply patterns in order, with later patterns overriding earlier ones
174
+ ignored = False
175
+
176
+ for pattern in self.patterns:
177
+ if pattern.matches(path_str, is_directory):
178
+ ignored = not pattern.is_negation
179
+
180
+ return ignored
181
+
182
+ except ValueError:
183
+ # Path is not relative to project root
184
+ return False
185
+ except Exception as e:
186
+ logger.debug(f"Error checking gitignore for {path}: {e}")
187
+ return False
188
+
189
+ def get_ignored_patterns(self) -> List[str]:
190
+ """Get list of all ignore patterns.
191
+
192
+ Returns:
193
+ List of pattern strings
194
+ """
195
+ return [p.original_pattern for p in self.patterns if not p.is_negation]
196
+
197
+ def get_negation_patterns(self) -> List[str]:
198
+ """Get list of all negation patterns.
199
+
200
+ Returns:
201
+ List of negation pattern strings
202
+ """
203
+ return [p.original_pattern for p in self.patterns if p.is_negation]
204
+
205
+
206
+ def create_gitignore_parser(project_root: Path) -> GitignoreParser:
207
+ """Create a gitignore parser for the given project.
208
+
209
+ Args:
210
+ project_root: Root directory of the project
211
+
212
+ Returns:
213
+ GitignoreParser instance
214
+ """
215
+ return GitignoreParser(project_root)
216
+
217
+
218
+ def is_path_gitignored(path: Path, project_root: Path) -> bool:
219
+ """Quick function to check if a path is gitignored.
220
+
221
+ Args:
222
+ path: Path to check
223
+ project_root: Root directory of the project
224
+
225
+ Returns:
226
+ True if the path should be ignored
227
+ """
228
+ parser = create_gitignore_parser(project_root)
229
+ return parser.is_ignored(path)