mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (49) hide show
  1. mcp_vector_search/__init__.py +3 -2
  2. mcp_vector_search/cli/commands/auto_index.py +397 -0
  3. mcp_vector_search/cli/commands/config.py +88 -40
  4. mcp_vector_search/cli/commands/index.py +198 -52
  5. mcp_vector_search/cli/commands/init.py +472 -58
  6. mcp_vector_search/cli/commands/install.py +284 -0
  7. mcp_vector_search/cli/commands/mcp.py +495 -0
  8. mcp_vector_search/cli/commands/search.py +241 -87
  9. mcp_vector_search/cli/commands/status.py +184 -58
  10. mcp_vector_search/cli/commands/watch.py +34 -35
  11. mcp_vector_search/cli/didyoumean.py +184 -0
  12. mcp_vector_search/cli/export.py +320 -0
  13. mcp_vector_search/cli/history.py +292 -0
  14. mcp_vector_search/cli/interactive.py +342 -0
  15. mcp_vector_search/cli/main.py +163 -26
  16. mcp_vector_search/cli/output.py +63 -45
  17. mcp_vector_search/config/defaults.py +50 -36
  18. mcp_vector_search/config/settings.py +49 -35
  19. mcp_vector_search/core/auto_indexer.py +298 -0
  20. mcp_vector_search/core/connection_pool.py +322 -0
  21. mcp_vector_search/core/database.py +335 -25
  22. mcp_vector_search/core/embeddings.py +73 -29
  23. mcp_vector_search/core/exceptions.py +19 -2
  24. mcp_vector_search/core/factory.py +310 -0
  25. mcp_vector_search/core/git_hooks.py +345 -0
  26. mcp_vector_search/core/indexer.py +237 -73
  27. mcp_vector_search/core/models.py +21 -19
  28. mcp_vector_search/core/project.py +73 -58
  29. mcp_vector_search/core/scheduler.py +330 -0
  30. mcp_vector_search/core/search.py +574 -86
  31. mcp_vector_search/core/watcher.py +48 -46
  32. mcp_vector_search/mcp/__init__.py +4 -0
  33. mcp_vector_search/mcp/__main__.py +25 -0
  34. mcp_vector_search/mcp/server.py +701 -0
  35. mcp_vector_search/parsers/base.py +30 -31
  36. mcp_vector_search/parsers/javascript.py +74 -48
  37. mcp_vector_search/parsers/python.py +57 -49
  38. mcp_vector_search/parsers/registry.py +47 -32
  39. mcp_vector_search/parsers/text.py +179 -0
  40. mcp_vector_search/utils/__init__.py +40 -0
  41. mcp_vector_search/utils/gitignore.py +229 -0
  42. mcp_vector_search/utils/timing.py +334 -0
  43. mcp_vector_search/utils/version.py +47 -0
  44. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/METADATA +173 -7
  45. mcp_vector_search-0.4.11.dist-info/RECORD +54 -0
  46. mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
  47. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/WHEEL +0 -0
  48. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/entry_points.txt +0 -0
  49. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/licenses/LICENSE +0 -0
@@ -2,7 +2,6 @@
2
2
 
3
3
  import re
4
4
  from pathlib import Path
5
- from typing import List, Optional
6
5
 
7
6
  from loguru import logger
8
7
 
@@ -23,19 +22,22 @@ class PythonParser(BaseParser):
23
22
  def _initialize_parser(self) -> None:
24
23
  """Initialize Tree-sitter parser for Python."""
25
24
  try:
26
- # Try the tree-sitter-languages package first
27
- import tree_sitter_languages
25
+ # Try the tree-sitter-language-pack package (maintained alternative)
26
+ from tree_sitter_language_pack import get_language, get_parser
28
27
 
29
- self._language = tree_sitter_languages.get_language("python")
30
- self._parser = tree_sitter_languages.get_parser("python")
31
- logger.debug("Python Tree-sitter parser initialized via tree-sitter-languages")
28
+ # Get the language and parser objects
29
+ self._language = get_language("python")
30
+ self._parser = get_parser("python")
31
+
32
+ logger.debug(
33
+ "Python Tree-sitter parser initialized via tree-sitter-language-pack"
34
+ )
32
35
  return
33
36
  except Exception as e:
34
- logger.debug(f"tree-sitter-languages failed: {e}")
37
+ logger.debug(f"tree-sitter-language-pack failed: {e}")
35
38
 
36
39
  try:
37
40
  # Fallback to manual tree-sitter setup (requires language binaries)
38
- import tree_sitter
39
41
 
40
42
  # This would require language binaries to be available
41
43
  # For now, we'll skip this and rely on fallback parsing
@@ -47,19 +49,21 @@ class PythonParser(BaseParser):
47
49
  self._parser = None
48
50
  self._language = None
49
51
 
50
- logger.info("Using fallback regex-based parsing for Python (Tree-sitter unavailable)")
52
+ logger.info(
53
+ "Using fallback regex-based parsing for Python (Tree-sitter unavailable)"
54
+ )
51
55
 
52
- async def parse_file(self, file_path: Path) -> List[CodeChunk]:
56
+ async def parse_file(self, file_path: Path) -> list[CodeChunk]:
53
57
  """Parse a Python file and extract code chunks."""
54
58
  try:
55
- with open(file_path, "r", encoding="utf-8") as f:
59
+ with open(file_path, encoding="utf-8") as f:
56
60
  content = f.read()
57
61
  return await self.parse_content(content, file_path)
58
62
  except Exception as e:
59
63
  logger.error(f"Failed to read file {file_path}: {e}")
60
64
  return []
61
65
 
62
- async def parse_content(self, content: str, file_path: Path) -> List[CodeChunk]:
66
+ async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
63
67
  """Parse Python content and extract code chunks."""
64
68
  if not content.strip():
65
69
  return []
@@ -78,7 +82,7 @@ class PythonParser(BaseParser):
78
82
 
79
83
  def _extract_chunks_from_tree(
80
84
  self, tree, content: str, file_path: Path
81
- ) -> List[CodeChunk]:
85
+ ) -> list[CodeChunk]:
82
86
  """Extract code chunks from Tree-sitter AST."""
83
87
  chunks = []
84
88
  lines = self._split_into_lines(content)
@@ -94,7 +98,7 @@ class PythonParser(BaseParser):
94
98
  elif node_type == "class_definition":
95
99
  class_chunks = self._extract_class(node, lines, file_path)
96
100
  chunks.extend(class_chunks)
97
-
101
+
98
102
  # Visit class methods with class context
99
103
  class_name = self._get_node_name(node)
100
104
  for child in node.children:
@@ -104,7 +108,7 @@ class PythonParser(BaseParser):
104
108
  module_chunk = self._extract_module_chunk(node, lines, file_path)
105
109
  if module_chunk:
106
110
  chunks.append(module_chunk)
107
-
111
+
108
112
  # Visit all children
109
113
  for child in node.children:
110
114
  visit_node(child)
@@ -115,7 +119,7 @@ class PythonParser(BaseParser):
115
119
 
116
120
  # Start traversal from root
117
121
  visit_node(tree.root_node)
118
-
122
+
119
123
  # If no specific chunks found, create a single chunk for the whole file
120
124
  if not chunks:
121
125
  chunks.append(
@@ -131,21 +135,21 @@ class PythonParser(BaseParser):
131
135
  return chunks
132
136
 
133
137
  def _extract_function(
134
- self, node, lines: List[str], file_path: Path, class_name: Optional[str] = None
135
- ) -> List[CodeChunk]:
138
+ self, node, lines: list[str], file_path: Path, class_name: str | None = None
139
+ ) -> list[CodeChunk]:
136
140
  """Extract function definition as a chunk."""
137
141
  chunks = []
138
-
142
+
139
143
  function_name = self._get_node_name(node)
140
144
  start_line = node.start_point[0] + 1
141
145
  end_line = node.end_point[0] + 1
142
-
146
+
143
147
  # Get function content
144
148
  content = self._get_line_range(lines, start_line, end_line)
145
-
149
+
146
150
  # Extract docstring if present
147
151
  docstring = self._extract_docstring(node, lines)
148
-
152
+
149
153
  chunk = self._create_chunk(
150
154
  content=content,
151
155
  file_path=file_path,
@@ -157,23 +161,25 @@ class PythonParser(BaseParser):
157
161
  docstring=docstring,
158
162
  )
159
163
  chunks.append(chunk)
160
-
164
+
161
165
  return chunks
162
166
 
163
- def _extract_class(self, node, lines: List[str], file_path: Path) -> List[CodeChunk]:
167
+ def _extract_class(
168
+ self, node, lines: list[str], file_path: Path
169
+ ) -> list[CodeChunk]:
164
170
  """Extract class definition as a chunk."""
165
171
  chunks = []
166
-
172
+
167
173
  class_name = self._get_node_name(node)
168
174
  start_line = node.start_point[0] + 1
169
175
  end_line = node.end_point[0] + 1
170
-
176
+
171
177
  # Get class content
172
178
  content = self._get_line_range(lines, start_line, end_line)
173
-
179
+
174
180
  # Extract docstring if present
175
181
  docstring = self._extract_docstring(node, lines)
176
-
182
+
177
183
  chunk = self._create_chunk(
178
184
  content=content,
179
185
  file_path=file_path,
@@ -184,23 +190,23 @@ class PythonParser(BaseParser):
184
190
  docstring=docstring,
185
191
  )
186
192
  chunks.append(chunk)
187
-
193
+
188
194
  return chunks
189
195
 
190
196
  def _extract_module_chunk(
191
- self, node, lines: List[str], file_path: Path
192
- ) -> Optional[CodeChunk]:
197
+ self, node, lines: list[str], file_path: Path
198
+ ) -> CodeChunk | None:
193
199
  """Extract module-level code (imports, constants, etc.)."""
194
200
  # Look for module-level statements (not inside functions/classes)
195
201
  module_lines = []
196
-
202
+
197
203
  for child in node.children:
198
204
  if child.type in ["import_statement", "import_from_statement"]:
199
205
  start_line = child.start_point[0] + 1
200
206
  end_line = child.end_point[0] + 1
201
207
  import_content = self._get_line_range(lines, start_line, end_line)
202
208
  module_lines.append(import_content.strip())
203
-
209
+
204
210
  if module_lines:
205
211
  content = "\n".join(module_lines)
206
212
  return self._create_chunk(
@@ -210,17 +216,17 @@ class PythonParser(BaseParser):
210
216
  end_line=len(module_lines),
211
217
  chunk_type="imports",
212
218
  )
213
-
219
+
214
220
  return None
215
221
 
216
- def _get_node_name(self, node) -> Optional[str]:
222
+ def _get_node_name(self, node) -> str | None:
217
223
  """Extract name from a named node (function, class, etc.)."""
218
224
  for child in node.children:
219
225
  if child.type == "identifier":
220
226
  return child.text.decode("utf-8")
221
227
  return None
222
228
 
223
- def _extract_docstring(self, node, lines: List[str]) -> Optional[str]:
229
+ def _extract_docstring(self, node, lines: list[str]) -> str | None:
224
230
  """Extract docstring from a function or class node."""
225
231
  # Look for string literal as first statement in body
226
232
  for child in node.children:
@@ -232,7 +238,9 @@ class PythonParser(BaseParser):
232
238
  # Extract string content
233
239
  start_line = expr_child.start_point[0] + 1
234
240
  end_line = expr_child.end_point[0] + 1
235
- docstring = self._get_line_range(lines, start_line, end_line)
241
+ docstring = self._get_line_range(
242
+ lines, start_line, end_line
243
+ )
236
244
  # Clean up docstring (remove quotes)
237
245
  return self._clean_docstring(docstring)
238
246
  return None
@@ -244,7 +252,7 @@ class PythonParser(BaseParser):
244
252
  cleaned = re.sub(r'^["\']|["\']$', "", cleaned.strip())
245
253
  return cleaned.strip()
246
254
 
247
- async def _fallback_parse(self, content: str, file_path: Path) -> List[CodeChunk]:
255
+ async def _fallback_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
248
256
  """Fallback parsing using regex when Tree-sitter is not available."""
249
257
  chunks = []
250
258
  lines = self._split_into_lines(content)
@@ -259,13 +267,13 @@ class PythonParser(BaseParser):
259
267
  for match in import_pattern.finditer(content):
260
268
  import_line = match.group(0).strip()
261
269
  imports.append(import_line)
262
-
270
+
263
271
  # Find functions
264
272
  for match in function_pattern.finditer(content):
265
273
  function_name = match.group(1)
266
274
  # Find the actual line with 'def' by looking for it in the match
267
275
  match_text = match.group(0)
268
- def_pos_in_match = match_text.find('def')
276
+ def_pos_in_match = match_text.find("def")
269
277
  actual_def_pos = match.start() + def_pos_in_match
270
278
  start_line = content[:actual_def_pos].count("\n") + 1
271
279
 
@@ -289,13 +297,13 @@ class PythonParser(BaseParser):
289
297
  )
290
298
  chunk.imports = imports # Add imports to chunk
291
299
  chunks.append(chunk)
292
-
300
+
293
301
  # Find classes
294
302
  for match in class_pattern.finditer(content):
295
303
  class_name = match.group(1)
296
304
  # Find the actual line with 'class' by looking for it in the match
297
305
  match_text = match.group(0)
298
- class_pos_in_match = match_text.find('class')
306
+ class_pos_in_match = match_text.find("class")
299
307
  actual_class_pos = match.start() + class_pos_in_match
300
308
  start_line = content[:actual_class_pos].count("\n") + 1
301
309
 
@@ -319,7 +327,7 @@ class PythonParser(BaseParser):
319
327
  )
320
328
  chunk.imports = imports # Add imports to chunk
321
329
  chunks.append(chunk)
322
-
330
+
323
331
  # If no functions or classes found, create chunks for the whole file
324
332
  if not chunks:
325
333
  chunks.append(
@@ -331,10 +339,10 @@ class PythonParser(BaseParser):
331
339
  chunk_type="module",
332
340
  )
333
341
  )
334
-
342
+
335
343
  return chunks
336
344
 
337
- def _find_function_end(self, lines: List[str], start_line: int) -> int:
345
+ def _find_function_end(self, lines: list[str], start_line: int) -> int:
338
346
  """Find the end line of a function using indentation."""
339
347
  if start_line > len(lines):
340
348
  return len(lines)
@@ -359,11 +367,11 @@ class PythonParser(BaseParser):
359
367
  # If we reach here, the function goes to the end of the file
360
368
  return len(lines)
361
369
 
362
- def _find_class_end(self, lines: List[str], start_line: int) -> int:
370
+ def _find_class_end(self, lines: list[str], start_line: int) -> int:
363
371
  """Find the end line of a class using indentation."""
364
372
  return self._find_function_end(lines, start_line)
365
373
 
366
- def _extract_docstring_regex(self, content: str) -> Optional[str]:
374
+ def _extract_docstring_regex(self, content: str) -> str | None:
367
375
  """Extract docstring using regex patterns."""
368
376
  # Look for triple-quoted strings at the beginning of the content
369
377
  # after the def/class line
@@ -397,11 +405,11 @@ class PythonParser(BaseParser):
397
405
  return " ".join(docstring_lines).strip()
398
406
 
399
407
  # If we hit non-docstring code, stop looking
400
- if line and not line.startswith('#'):
408
+ if line and not line.startswith("#"):
401
409
  break
402
410
 
403
411
  return None
404
412
 
405
- def get_supported_extensions(self) -> List[str]:
413
+ def get_supported_extensions(self) -> list[str]:
406
414
  """Get supported file extensions."""
407
415
  return [".py", ".pyw"]
@@ -1,13 +1,13 @@
1
1
  """Parser registry for MCP Vector Search."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import Dict, List, Optional
5
4
 
6
5
  from loguru import logger
7
6
 
8
7
  from .base import BaseParser, FallbackParser
9
- from .python import PythonParser
10
8
  from .javascript import JavaScriptParser, TypeScriptParser
9
+ from .python import PythonParser
10
+ from .text import TextParser
11
11
 
12
12
 
13
13
  class ParserRegistry:
@@ -15,10 +15,16 @@ class ParserRegistry:
15
15
 
16
16
  def __init__(self) -> None:
17
17
  """Initialize parser registry."""
18
- self._parsers: Dict[str, BaseParser] = {}
19
- self._extension_map: Dict[str, str] = {}
18
+ self._parsers: dict[str, BaseParser] = {}
19
+ self._extension_map: dict[str, str] = {}
20
20
  self._fallback_parser = FallbackParser()
21
- self._register_default_parsers()
21
+ self._initialized = False
22
+
23
+ def _ensure_initialized(self) -> None:
24
+ """Ensure parsers are initialized (lazy initialization)."""
25
+ if not self._initialized:
26
+ self._register_default_parsers()
27
+ self._initialized = True
22
28
 
23
29
  def _register_default_parsers(self) -> None:
24
30
  """Register default parsers for supported languages."""
@@ -33,72 +39,79 @@ class ParserRegistry:
33
39
  # Register TypeScript parser
34
40
  typescript_parser = TypeScriptParser()
35
41
  self.register_parser("typescript", typescript_parser)
42
+
43
+ # Register Text parser for .txt files
44
+ text_parser = TextParser()
45
+ self.register_parser("text", text_parser)
36
46
 
37
47
  def register_parser(self, language: str, parser: BaseParser) -> None:
38
48
  """Register a parser for a specific language.
39
-
49
+
40
50
  Args:
41
51
  language: Language name
42
52
  parser: Parser instance
43
53
  """
44
54
  self._parsers[language] = parser
45
-
55
+
46
56
  # Map file extensions to language
47
57
  for ext in parser.get_supported_extensions():
48
58
  if ext != "*": # Skip fallback marker
49
59
  self._extension_map[ext.lower()] = language
50
-
60
+
51
61
  logger.debug(f"Registered parser for {language}: {parser.__class__.__name__}")
52
62
 
53
63
  def get_parser(self, file_extension: str) -> BaseParser:
54
64
  """Get parser for a file extension.
55
-
65
+
56
66
  Args:
57
67
  file_extension: File extension (including dot)
58
-
68
+
59
69
  Returns:
60
70
  Parser instance (fallback parser if no specific parser found)
61
71
  """
72
+ self._ensure_initialized()
62
73
  language = self._extension_map.get(file_extension.lower())
63
74
  if language and language in self._parsers:
64
75
  return self._parsers[language]
65
-
76
+
66
77
  # Return fallback parser for unsupported extensions
67
78
  return self._fallback_parser
68
79
 
69
80
  def get_parser_for_file(self, file_path: Path) -> BaseParser:
70
81
  """Get parser for a specific file.
71
-
82
+
72
83
  Args:
73
84
  file_path: Path to the file
74
-
85
+
75
86
  Returns:
76
87
  Parser instance
77
88
  """
78
89
  return self.get_parser(file_path.suffix)
79
90
 
80
- def get_supported_languages(self) -> List[str]:
91
+ def get_supported_languages(self) -> list[str]:
81
92
  """Get list of supported languages.
82
-
93
+
83
94
  Returns:
84
95
  List of language names
85
96
  """
97
+ self._ensure_initialized()
86
98
  return list(self._parsers.keys())
87
99
 
88
- def get_supported_extensions(self) -> List[str]:
100
+ def get_supported_extensions(self) -> list[str]:
89
101
  """Get list of supported file extensions.
90
-
102
+
91
103
  Returns:
92
104
  List of file extensions
93
105
  """
106
+ self._ensure_initialized()
94
107
  return list(self._extension_map.keys())
95
108
 
96
109
  def is_supported(self, file_extension: str) -> bool:
97
110
  """Check if a file extension is supported.
98
-
111
+
99
112
  Args:
100
113
  file_extension: File extension to check
101
-
114
+
102
115
  Returns:
103
116
  True if supported (always True due to fallback parser)
104
117
  """
@@ -106,37 +119,39 @@ class ParserRegistry:
106
119
 
107
120
  def get_language_for_extension(self, file_extension: str) -> str:
108
121
  """Get language name for a file extension.
109
-
122
+
110
123
  Args:
111
124
  file_extension: File extension
112
-
125
+
113
126
  Returns:
114
127
  Language name (or "text" for unsupported extensions)
115
128
  """
129
+ self._ensure_initialized()
116
130
  return self._extension_map.get(file_extension.lower(), "text")
117
131
 
118
- def get_parser_info(self) -> Dict[str, Dict[str, any]]:
132
+ def get_parser_info(self) -> dict[str, dict[str, any]]:
119
133
  """Get information about registered parsers.
120
-
134
+
121
135
  Returns:
122
136
  Dictionary with parser information
123
137
  """
138
+ self._ensure_initialized()
124
139
  info = {}
125
-
140
+
126
141
  for language, parser in self._parsers.items():
127
142
  info[language] = {
128
143
  "class": parser.__class__.__name__,
129
144
  "extensions": parser.get_supported_extensions(),
130
145
  "language": parser.language,
131
146
  }
132
-
147
+
133
148
  # Add fallback parser info
134
149
  info["fallback"] = {
135
150
  "class": self._fallback_parser.__class__.__name__,
136
151
  "extensions": ["*"],
137
152
  "language": self._fallback_parser.language,
138
153
  }
139
-
154
+
140
155
  return info
141
156
 
142
157
 
@@ -146,7 +161,7 @@ _registry = ParserRegistry()
146
161
 
147
162
  def get_parser_registry() -> ParserRegistry:
148
163
  """Get the global parser registry instance.
149
-
164
+
150
165
  Returns:
151
166
  Parser registry instance
152
167
  """
@@ -155,7 +170,7 @@ def get_parser_registry() -> ParserRegistry:
155
170
 
156
171
  def register_parser(language: str, parser: BaseParser) -> None:
157
172
  """Register a parser in the global registry.
158
-
173
+
159
174
  Args:
160
175
  language: Language name
161
176
  parser: Parser instance
@@ -165,10 +180,10 @@ def register_parser(language: str, parser: BaseParser) -> None:
165
180
 
166
181
  def get_parser(file_extension: str) -> BaseParser:
167
182
  """Get parser for a file extension from the global registry.
168
-
183
+
169
184
  Args:
170
185
  file_extension: File extension
171
-
186
+
172
187
  Returns:
173
188
  Parser instance
174
189
  """
@@ -177,10 +192,10 @@ def get_parser(file_extension: str) -> BaseParser:
177
192
 
178
193
  def get_parser_for_file(file_path: Path) -> BaseParser:
179
194
  """Get parser for a file from the global registry.
180
-
195
+
181
196
  Args:
182
197
  file_path: File path
183
-
198
+
184
199
  Returns:
185
200
  Parser instance
186
201
  """