mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +3 -2
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/config.py +88 -40
- mcp_vector_search/cli/commands/index.py +198 -52
- mcp_vector_search/cli/commands/init.py +471 -58
- mcp_vector_search/cli/commands/install.py +284 -0
- mcp_vector_search/cli/commands/mcp.py +495 -0
- mcp_vector_search/cli/commands/search.py +241 -87
- mcp_vector_search/cli/commands/status.py +184 -58
- mcp_vector_search/cli/commands/watch.py +34 -35
- mcp_vector_search/cli/didyoumean.py +184 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +292 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +175 -27
- mcp_vector_search/cli/output.py +63 -45
- mcp_vector_search/config/defaults.py +50 -36
- mcp_vector_search/config/settings.py +49 -35
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/connection_pool.py +322 -0
- mcp_vector_search/core/database.py +335 -25
- mcp_vector_search/core/embeddings.py +73 -29
- mcp_vector_search/core/exceptions.py +19 -2
- mcp_vector_search/core/factory.py +310 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +237 -73
- mcp_vector_search/core/models.py +21 -19
- mcp_vector_search/core/project.py +73 -58
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +574 -86
- mcp_vector_search/core/watcher.py +48 -46
- mcp_vector_search/mcp/__init__.py +4 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +701 -0
- mcp_vector_search/parsers/base.py +30 -31
- mcp_vector_search/parsers/javascript.py +74 -48
- mcp_vector_search/parsers/python.py +57 -49
- mcp_vector_search/parsers/registry.py +47 -32
- mcp_vector_search/parsers/text.py +179 -0
- mcp_vector_search/utils/__init__.py +40 -0
- mcp_vector_search/utils/gitignore.py +229 -0
- mcp_vector_search/utils/timing.py +334 -0
- mcp_vector_search/utils/version.py +47 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/METADATA +173 -7
- mcp_vector_search-0.4.12.dist-info/RECORD +54 -0
- mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/WHEEL +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/entry_points.txt +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import List, Optional
|
|
6
5
|
|
|
7
6
|
from ..core.models import CodeChunk
|
|
8
7
|
|
|
@@ -12,32 +11,32 @@ class BaseParser(ABC):
|
|
|
12
11
|
|
|
13
12
|
def __init__(self, language: str) -> None:
|
|
14
13
|
"""Initialize parser for a specific language.
|
|
15
|
-
|
|
14
|
+
|
|
16
15
|
Args:
|
|
17
16
|
language: Programming language name
|
|
18
17
|
"""
|
|
19
18
|
self.language = language
|
|
20
19
|
|
|
21
20
|
@abstractmethod
|
|
22
|
-
async def parse_file(self, file_path: Path) ->
|
|
21
|
+
async def parse_file(self, file_path: Path) -> list[CodeChunk]:
|
|
23
22
|
"""Parse a file and extract code chunks.
|
|
24
|
-
|
|
23
|
+
|
|
25
24
|
Args:
|
|
26
25
|
file_path: Path to the file to parse
|
|
27
|
-
|
|
26
|
+
|
|
28
27
|
Returns:
|
|
29
28
|
List of code chunks extracted from the file
|
|
30
29
|
"""
|
|
31
30
|
...
|
|
32
31
|
|
|
33
32
|
@abstractmethod
|
|
34
|
-
async def parse_content(self, content: str, file_path: Path) ->
|
|
33
|
+
async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
35
34
|
"""Parse content and extract code chunks.
|
|
36
|
-
|
|
35
|
+
|
|
37
36
|
Args:
|
|
38
37
|
content: File content to parse
|
|
39
38
|
file_path: Path to the source file (for metadata)
|
|
40
|
-
|
|
39
|
+
|
|
41
40
|
Returns:
|
|
42
41
|
List of code chunks extracted from the content
|
|
43
42
|
"""
|
|
@@ -45,19 +44,19 @@ class BaseParser(ABC):
|
|
|
45
44
|
|
|
46
45
|
def supports_file(self, file_path: Path) -> bool:
|
|
47
46
|
"""Check if this parser supports the given file.
|
|
48
|
-
|
|
47
|
+
|
|
49
48
|
Args:
|
|
50
49
|
file_path: Path to check
|
|
51
|
-
|
|
50
|
+
|
|
52
51
|
Returns:
|
|
53
52
|
True if this parser can handle the file
|
|
54
53
|
"""
|
|
55
54
|
return file_path.suffix.lower() in self.get_supported_extensions()
|
|
56
55
|
|
|
57
56
|
@abstractmethod
|
|
58
|
-
def get_supported_extensions(self) ->
|
|
57
|
+
def get_supported_extensions(self) -> list[str]:
|
|
59
58
|
"""Get list of file extensions supported by this parser.
|
|
60
|
-
|
|
59
|
+
|
|
61
60
|
Returns:
|
|
62
61
|
List of file extensions (including the dot)
|
|
63
62
|
"""
|
|
@@ -70,12 +69,12 @@ class BaseParser(ABC):
|
|
|
70
69
|
start_line: int,
|
|
71
70
|
end_line: int,
|
|
72
71
|
chunk_type: str = "code",
|
|
73
|
-
function_name:
|
|
74
|
-
class_name:
|
|
75
|
-
docstring:
|
|
72
|
+
function_name: str | None = None,
|
|
73
|
+
class_name: str | None = None,
|
|
74
|
+
docstring: str | None = None,
|
|
76
75
|
) -> CodeChunk:
|
|
77
76
|
"""Create a code chunk with metadata.
|
|
78
|
-
|
|
77
|
+
|
|
79
78
|
Args:
|
|
80
79
|
content: Code content
|
|
81
80
|
file_path: Source file path
|
|
@@ -85,7 +84,7 @@ class BaseParser(ABC):
|
|
|
85
84
|
function_name: Function name if applicable
|
|
86
85
|
class_name: Class name if applicable
|
|
87
86
|
docstring: Docstring if applicable
|
|
88
|
-
|
|
87
|
+
|
|
89
88
|
Returns:
|
|
90
89
|
CodeChunk instance
|
|
91
90
|
"""
|
|
@@ -101,32 +100,32 @@ class BaseParser(ABC):
|
|
|
101
100
|
docstring=docstring,
|
|
102
101
|
)
|
|
103
102
|
|
|
104
|
-
def _split_into_lines(self, content: str) ->
|
|
103
|
+
def _split_into_lines(self, content: str) -> list[str]:
|
|
105
104
|
"""Split content into lines, preserving line endings.
|
|
106
|
-
|
|
105
|
+
|
|
107
106
|
Args:
|
|
108
107
|
content: Content to split
|
|
109
|
-
|
|
108
|
+
|
|
110
109
|
Returns:
|
|
111
110
|
List of lines
|
|
112
111
|
"""
|
|
113
112
|
return content.splitlines(keepends=True)
|
|
114
113
|
|
|
115
|
-
def _get_line_range(self, lines:
|
|
114
|
+
def _get_line_range(self, lines: list[str], start_line: int, end_line: int) -> str:
|
|
116
115
|
"""Extract a range of lines from content.
|
|
117
|
-
|
|
116
|
+
|
|
118
117
|
Args:
|
|
119
118
|
lines: List of lines
|
|
120
119
|
start_line: Starting line number (1-based)
|
|
121
120
|
end_line: Ending line number (1-based)
|
|
122
|
-
|
|
121
|
+
|
|
123
122
|
Returns:
|
|
124
123
|
Content for the specified line range
|
|
125
124
|
"""
|
|
126
125
|
# Convert to 0-based indexing
|
|
127
126
|
start_idx = max(0, start_line - 1)
|
|
128
127
|
end_idx = min(len(lines), end_line)
|
|
129
|
-
|
|
128
|
+
|
|
130
129
|
return "".join(lines[start_idx:end_idx])
|
|
131
130
|
|
|
132
131
|
|
|
@@ -137,32 +136,32 @@ class FallbackParser(BaseParser):
|
|
|
137
136
|
"""Initialize fallback parser."""
|
|
138
137
|
super().__init__(language)
|
|
139
138
|
|
|
140
|
-
async def parse_file(self, file_path: Path) ->
|
|
139
|
+
async def parse_file(self, file_path: Path) -> list[CodeChunk]:
|
|
141
140
|
"""Parse file using simple text chunking."""
|
|
142
141
|
try:
|
|
143
|
-
with open(file_path,
|
|
142
|
+
with open(file_path, encoding="utf-8") as f:
|
|
144
143
|
content = f.read()
|
|
145
144
|
return await self.parse_content(content, file_path)
|
|
146
145
|
except Exception:
|
|
147
146
|
# Return empty list if file can't be read
|
|
148
147
|
return []
|
|
149
148
|
|
|
150
|
-
async def parse_content(self, content: str, file_path: Path) ->
|
|
149
|
+
async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
151
150
|
"""Parse content using simple text chunking."""
|
|
152
151
|
if not content.strip():
|
|
153
152
|
return []
|
|
154
153
|
|
|
155
154
|
lines = self._split_into_lines(content)
|
|
156
155
|
chunks = []
|
|
157
|
-
|
|
156
|
+
|
|
158
157
|
# Simple chunking: split into chunks of ~50 lines
|
|
159
158
|
chunk_size = 50
|
|
160
159
|
for i in range(0, len(lines), chunk_size):
|
|
161
160
|
start_line = i + 1
|
|
162
161
|
end_line = min(i + chunk_size, len(lines))
|
|
163
|
-
|
|
162
|
+
|
|
164
163
|
chunk_content = self._get_line_range(lines, start_line, end_line)
|
|
165
|
-
|
|
164
|
+
|
|
166
165
|
if chunk_content.strip():
|
|
167
166
|
chunk = self._create_chunk(
|
|
168
167
|
content=chunk_content,
|
|
@@ -175,6 +174,6 @@ class FallbackParser(BaseParser):
|
|
|
175
174
|
|
|
176
175
|
return chunks
|
|
177
176
|
|
|
178
|
-
def get_supported_extensions(self) ->
|
|
177
|
+
def get_supported_extensions(self) -> list[str]:
|
|
179
178
|
"""Fallback parser supports all extensions."""
|
|
180
179
|
return ["*"] # Special marker for "all extensions"
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import List, Optional
|
|
6
5
|
|
|
7
6
|
from loguru import logger
|
|
8
7
|
|
|
@@ -17,24 +16,24 @@ class JavaScriptParser(BaseParser):
|
|
|
17
16
|
"""Initialize JavaScript parser."""
|
|
18
17
|
super().__init__(language)
|
|
19
18
|
|
|
20
|
-
async def parse_file(self, file_path: Path) ->
|
|
19
|
+
async def parse_file(self, file_path: Path) -> list[CodeChunk]:
|
|
21
20
|
"""Parse a JavaScript/TypeScript file and extract code chunks."""
|
|
22
21
|
try:
|
|
23
|
-
with open(file_path,
|
|
22
|
+
with open(file_path, encoding="utf-8") as f:
|
|
24
23
|
content = f.read()
|
|
25
24
|
return await self.parse_content(content, file_path)
|
|
26
25
|
except Exception as e:
|
|
27
26
|
logger.error(f"Failed to read file {file_path}: {e}")
|
|
28
27
|
return []
|
|
29
28
|
|
|
30
|
-
async def parse_content(self, content: str, file_path: Path) ->
|
|
29
|
+
async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
31
30
|
"""Parse JavaScript/TypeScript content and extract code chunks."""
|
|
32
31
|
if not content.strip():
|
|
33
32
|
return []
|
|
34
33
|
|
|
35
34
|
return await self._regex_parse(content, file_path)
|
|
36
35
|
|
|
37
|
-
async def _regex_parse(self, content: str, file_path: Path) ->
|
|
36
|
+
async def _regex_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
38
37
|
"""Parse JavaScript/TypeScript using regex patterns."""
|
|
39
38
|
chunks = []
|
|
40
39
|
lines = self._split_into_lines(content)
|
|
@@ -42,25 +41,43 @@ class JavaScriptParser(BaseParser):
|
|
|
42
41
|
# JavaScript/TypeScript patterns
|
|
43
42
|
function_patterns = [
|
|
44
43
|
re.compile(r"^\s*function\s+(\w+)\s*\(", re.MULTILINE), # function name()
|
|
45
|
-
re.compile(
|
|
46
|
-
|
|
47
|
-
|
|
44
|
+
re.compile(
|
|
45
|
+
r"^\s*const\s+(\w+)\s*=\s*\([^)]*\)\s*=>\s*{", re.MULTILINE
|
|
46
|
+
), # const name = () => {
|
|
47
|
+
re.compile(
|
|
48
|
+
r"^\s*const\s+(\w+)\s*=\s*function\s*\(", re.MULTILINE
|
|
49
|
+
), # const name = function(
|
|
50
|
+
re.compile(
|
|
51
|
+
r"^\s*(\w+)\s*:\s*function\s*\(", re.MULTILINE
|
|
52
|
+
), # name: function(
|
|
48
53
|
re.compile(r"^\s*(\w+)\s*\([^)]*\)\s*{", re.MULTILINE), # name() { (method)
|
|
49
|
-
re.compile(
|
|
50
|
-
|
|
54
|
+
re.compile(
|
|
55
|
+
r"^\s*async\s+function\s+(\w+)\s*\(", re.MULTILINE
|
|
56
|
+
), # async function name()
|
|
57
|
+
re.compile(
|
|
58
|
+
r"^\s*async\s+(\w+)\s*\([^)]*\)\s*{", re.MULTILINE
|
|
59
|
+
), # async name() {
|
|
51
60
|
]
|
|
52
|
-
|
|
61
|
+
|
|
53
62
|
class_patterns = [
|
|
54
63
|
re.compile(r"^\s*class\s+(\w+)", re.MULTILINE), # class Name
|
|
55
|
-
re.compile(
|
|
56
|
-
|
|
64
|
+
re.compile(
|
|
65
|
+
r"^\s*export\s+class\s+(\w+)", re.MULTILINE
|
|
66
|
+
), # export class Name
|
|
67
|
+
re.compile(
|
|
68
|
+
r"^\s*export\s+default\s+class\s+(\w+)", re.MULTILINE
|
|
69
|
+
), # export default class Name
|
|
57
70
|
]
|
|
58
|
-
|
|
71
|
+
|
|
59
72
|
interface_patterns = [
|
|
60
|
-
re.compile(
|
|
61
|
-
|
|
73
|
+
re.compile(
|
|
74
|
+
r"^\s*interface\s+(\w+)", re.MULTILINE
|
|
75
|
+
), # interface Name (TypeScript)
|
|
76
|
+
re.compile(
|
|
77
|
+
r"^\s*export\s+interface\s+(\w+)", re.MULTILINE
|
|
78
|
+
), # export interface Name
|
|
62
79
|
]
|
|
63
|
-
|
|
80
|
+
|
|
64
81
|
import_pattern = re.compile(r"^\s*(import|export).*", re.MULTILINE)
|
|
65
82
|
|
|
66
83
|
# Extract imports
|
|
@@ -73,17 +90,17 @@ class JavaScriptParser(BaseParser):
|
|
|
73
90
|
for pattern in function_patterns:
|
|
74
91
|
for match in pattern.finditer(content):
|
|
75
92
|
function_name = match.group(1)
|
|
76
|
-
start_line = content[:match.start()].count("\n") + 1
|
|
77
|
-
|
|
93
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
94
|
+
|
|
78
95
|
# Find end of function
|
|
79
96
|
end_line = self._find_block_end(lines, start_line, "{", "}")
|
|
80
|
-
|
|
97
|
+
|
|
81
98
|
func_content = self._get_line_range(lines, start_line, end_line)
|
|
82
|
-
|
|
99
|
+
|
|
83
100
|
if func_content.strip():
|
|
84
101
|
# Extract JSDoc comment
|
|
85
102
|
jsdoc = self._extract_jsdoc(lines, start_line)
|
|
86
|
-
|
|
103
|
+
|
|
87
104
|
chunk = self._create_chunk(
|
|
88
105
|
content=func_content,
|
|
89
106
|
file_path=file_path,
|
|
@@ -100,17 +117,17 @@ class JavaScriptParser(BaseParser):
|
|
|
100
117
|
for pattern in class_patterns:
|
|
101
118
|
for match in pattern.finditer(content):
|
|
102
119
|
class_name = match.group(1)
|
|
103
|
-
start_line = content[:match.start()].count("\n") + 1
|
|
104
|
-
|
|
120
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
121
|
+
|
|
105
122
|
# Find end of class
|
|
106
123
|
end_line = self._find_block_end(lines, start_line, "{", "}")
|
|
107
|
-
|
|
124
|
+
|
|
108
125
|
class_content = self._get_line_range(lines, start_line, end_line)
|
|
109
|
-
|
|
126
|
+
|
|
110
127
|
if class_content.strip():
|
|
111
128
|
# Extract JSDoc comment
|
|
112
129
|
jsdoc = self._extract_jsdoc(lines, start_line)
|
|
113
|
-
|
|
130
|
+
|
|
114
131
|
chunk = self._create_chunk(
|
|
115
132
|
content=class_content,
|
|
116
133
|
file_path=file_path,
|
|
@@ -128,17 +145,19 @@ class JavaScriptParser(BaseParser):
|
|
|
128
145
|
for pattern in interface_patterns:
|
|
129
146
|
for match in pattern.finditer(content):
|
|
130
147
|
interface_name = match.group(1)
|
|
131
|
-
start_line = content[:match.start()].count("\n") + 1
|
|
132
|
-
|
|
148
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
149
|
+
|
|
133
150
|
# Find end of interface
|
|
134
151
|
end_line = self._find_block_end(lines, start_line, "{", "}")
|
|
135
|
-
|
|
136
|
-
interface_content = self._get_line_range(
|
|
137
|
-
|
|
152
|
+
|
|
153
|
+
interface_content = self._get_line_range(
|
|
154
|
+
lines, start_line, end_line
|
|
155
|
+
)
|
|
156
|
+
|
|
138
157
|
if interface_content.strip():
|
|
139
158
|
# Extract JSDoc comment
|
|
140
159
|
jsdoc = self._extract_jsdoc(lines, start_line)
|
|
141
|
-
|
|
160
|
+
|
|
142
161
|
chunk = self._create_chunk(
|
|
143
162
|
content=interface_content,
|
|
144
163
|
file_path=file_path,
|
|
@@ -165,64 +184,71 @@ class JavaScriptParser(BaseParser):
|
|
|
165
184
|
|
|
166
185
|
return chunks
|
|
167
186
|
|
|
168
|
-
def _find_block_end(
|
|
187
|
+
def _find_block_end(
|
|
188
|
+
self, lines: list[str], start_line: int, open_char: str, close_char: str
|
|
189
|
+
) -> int:
|
|
169
190
|
"""Find the end of a block by matching braces."""
|
|
170
191
|
if start_line > len(lines):
|
|
171
192
|
return len(lines)
|
|
172
|
-
|
|
193
|
+
|
|
173
194
|
brace_count = 0
|
|
174
195
|
found_opening = False
|
|
175
|
-
|
|
196
|
+
|
|
176
197
|
for i in range(start_line - 1, len(lines)):
|
|
177
198
|
line = lines[i]
|
|
178
|
-
|
|
199
|
+
|
|
179
200
|
for char in line:
|
|
180
201
|
if char == open_char:
|
|
181
202
|
brace_count += 1
|
|
182
203
|
found_opening = True
|
|
183
204
|
elif char == close_char:
|
|
184
205
|
brace_count -= 1
|
|
185
|
-
|
|
206
|
+
|
|
186
207
|
if found_opening and brace_count == 0:
|
|
187
208
|
return i + 1 # Return 1-based line number
|
|
188
|
-
|
|
209
|
+
|
|
189
210
|
return len(lines)
|
|
190
211
|
|
|
191
|
-
def _extract_jsdoc(self, lines:
|
|
212
|
+
def _extract_jsdoc(self, lines: list[str], start_line: int) -> str | None:
|
|
192
213
|
"""Extract JSDoc comment before a function/class."""
|
|
193
214
|
if start_line <= 1:
|
|
194
215
|
return None
|
|
195
|
-
|
|
216
|
+
|
|
196
217
|
# Look backwards for JSDoc comment
|
|
197
218
|
for i in range(start_line - 2, max(-1, start_line - 10), -1):
|
|
198
219
|
line = lines[i].strip()
|
|
199
|
-
|
|
220
|
+
|
|
200
221
|
if line.endswith("*/"):
|
|
201
222
|
# Found end of JSDoc, collect the comment
|
|
202
223
|
jsdoc_lines = []
|
|
203
224
|
for j in range(i, -1, -1):
|
|
204
225
|
comment_line = lines[j].strip()
|
|
205
226
|
jsdoc_lines.insert(0, comment_line)
|
|
206
|
-
|
|
227
|
+
|
|
207
228
|
if comment_line.startswith("/**"):
|
|
208
229
|
# Found start of JSDoc
|
|
209
230
|
# Clean up the comment
|
|
210
231
|
cleaned_lines = []
|
|
211
232
|
for line in jsdoc_lines:
|
|
212
233
|
# Remove /** */ and * prefixes
|
|
213
|
-
cleaned =
|
|
234
|
+
cleaned = (
|
|
235
|
+
line.replace("/**", "")
|
|
236
|
+
.replace("*/", "")
|
|
237
|
+
.replace("*", "")
|
|
238
|
+
.strip()
|
|
239
|
+
)
|
|
214
240
|
if cleaned:
|
|
215
241
|
cleaned_lines.append(cleaned)
|
|
216
|
-
|
|
242
|
+
|
|
217
243
|
return " ".join(cleaned_lines) if cleaned_lines else None
|
|
218
|
-
|
|
244
|
+
|
|
219
245
|
# If we hit non-comment code, stop looking
|
|
220
246
|
elif line and not line.startswith("//") and not line.startswith("*"):
|
|
221
247
|
break
|
|
222
|
-
|
|
248
|
+
|
|
223
249
|
return None
|
|
224
250
|
|
|
225
|
-
def get_supported_extensions(self) ->
|
|
251
|
+
def get_supported_extensions(self) -> list[str]:
|
|
226
252
|
"""Get supported file extensions."""
|
|
227
253
|
if self.language == "typescript":
|
|
228
254
|
return [".ts", ".tsx"]
|