mcp-vector-search 0.15.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (86) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/chat.py +534 -0
  6. mcp_vector_search/cli/commands/config.py +393 -0
  7. mcp_vector_search/cli/commands/demo.py +358 -0
  8. mcp_vector_search/cli/commands/index.py +762 -0
  9. mcp_vector_search/cli/commands/init.py +658 -0
  10. mcp_vector_search/cli/commands/install.py +869 -0
  11. mcp_vector_search/cli/commands/install_old.py +700 -0
  12. mcp_vector_search/cli/commands/mcp.py +1254 -0
  13. mcp_vector_search/cli/commands/reset.py +393 -0
  14. mcp_vector_search/cli/commands/search.py +796 -0
  15. mcp_vector_search/cli/commands/setup.py +1133 -0
  16. mcp_vector_search/cli/commands/status.py +584 -0
  17. mcp_vector_search/cli/commands/uninstall.py +404 -0
  18. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  19. mcp_vector_search/cli/commands/visualize/cli.py +265 -0
  20. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  21. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  22. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  23. mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
  24. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  25. mcp_vector_search/cli/commands/visualize/server.py +201 -0
  26. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  27. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  28. mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
  29. mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
  30. mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
  31. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  32. mcp_vector_search/cli/commands/watch.py +287 -0
  33. mcp_vector_search/cli/didyoumean.py +520 -0
  34. mcp_vector_search/cli/export.py +320 -0
  35. mcp_vector_search/cli/history.py +295 -0
  36. mcp_vector_search/cli/interactive.py +342 -0
  37. mcp_vector_search/cli/main.py +484 -0
  38. mcp_vector_search/cli/output.py +414 -0
  39. mcp_vector_search/cli/suggestions.py +375 -0
  40. mcp_vector_search/config/__init__.py +1 -0
  41. mcp_vector_search/config/constants.py +24 -0
  42. mcp_vector_search/config/defaults.py +200 -0
  43. mcp_vector_search/config/settings.py +146 -0
  44. mcp_vector_search/core/__init__.py +1 -0
  45. mcp_vector_search/core/auto_indexer.py +298 -0
  46. mcp_vector_search/core/config_utils.py +394 -0
  47. mcp_vector_search/core/connection_pool.py +360 -0
  48. mcp_vector_search/core/database.py +1237 -0
  49. mcp_vector_search/core/directory_index.py +318 -0
  50. mcp_vector_search/core/embeddings.py +294 -0
  51. mcp_vector_search/core/exceptions.py +89 -0
  52. mcp_vector_search/core/factory.py +318 -0
  53. mcp_vector_search/core/git_hooks.py +345 -0
  54. mcp_vector_search/core/indexer.py +1002 -0
  55. mcp_vector_search/core/llm_client.py +453 -0
  56. mcp_vector_search/core/models.py +294 -0
  57. mcp_vector_search/core/project.py +350 -0
  58. mcp_vector_search/core/scheduler.py +330 -0
  59. mcp_vector_search/core/search.py +952 -0
  60. mcp_vector_search/core/watcher.py +322 -0
  61. mcp_vector_search/mcp/__init__.py +5 -0
  62. mcp_vector_search/mcp/__main__.py +25 -0
  63. mcp_vector_search/mcp/server.py +752 -0
  64. mcp_vector_search/parsers/__init__.py +8 -0
  65. mcp_vector_search/parsers/base.py +296 -0
  66. mcp_vector_search/parsers/dart.py +605 -0
  67. mcp_vector_search/parsers/html.py +413 -0
  68. mcp_vector_search/parsers/javascript.py +643 -0
  69. mcp_vector_search/parsers/php.py +694 -0
  70. mcp_vector_search/parsers/python.py +502 -0
  71. mcp_vector_search/parsers/registry.py +223 -0
  72. mcp_vector_search/parsers/ruby.py +678 -0
  73. mcp_vector_search/parsers/text.py +186 -0
  74. mcp_vector_search/parsers/utils.py +265 -0
  75. mcp_vector_search/py.typed +1 -0
  76. mcp_vector_search/utils/__init__.py +42 -0
  77. mcp_vector_search/utils/gitignore.py +250 -0
  78. mcp_vector_search/utils/gitignore_updater.py +212 -0
  79. mcp_vector_search/utils/monorepo.py +339 -0
  80. mcp_vector_search/utils/timing.py +338 -0
  81. mcp_vector_search/utils/version.py +47 -0
  82. mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
  83. mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
  84. mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
  85. mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
  86. mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,186 @@
1
+ """Text file parser for MCP Vector Search."""
2
+
3
+ from pathlib import Path
4
+
5
+ from ..config.constants import TEXT_CHUNK_SIZE
6
+ from ..core.models import CodeChunk
7
+ from .base import BaseParser
8
+
9
+
10
+ class TextParser(BaseParser):
11
+ """Parser for plain text and markdown files (.txt, .md, .markdown)."""
12
+
13
+ def __init__(self) -> None:
14
+ """Initialize text parser."""
15
+ super().__init__("text")
16
+
17
+ async def parse_file(self, file_path: Path) -> list[CodeChunk]:
18
+ """Parse a text file and extract chunks.
19
+
20
+ Args:
21
+ file_path: Path to the text file
22
+
23
+ Returns:
24
+ List of text chunks
25
+ """
26
+ try:
27
+ with open(file_path, encoding="utf-8") as f:
28
+ content = f.read()
29
+ return await self.parse_content(content, file_path)
30
+ except Exception:
31
+ # Return empty list if file can't be read
32
+ return []
33
+
34
+ async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
35
+ """Parse text content into semantic chunks.
36
+
37
+ Uses paragraph-based chunking for better semantic coherence.
38
+ Falls back to line-based chunking for non-paragraph text.
39
+
40
+ Args:
41
+ content: Text content to parse
42
+ file_path: Path to the source file
43
+
44
+ Returns:
45
+ List of text chunks
46
+ """
47
+ if not content.strip():
48
+ return []
49
+
50
+ chunks = []
51
+ lines = content.splitlines(keepends=True)
52
+
53
+ # Try paragraph-based chunking first
54
+ paragraphs = self._extract_paragraphs(content)
55
+
56
+ if paragraphs:
57
+ # Use paragraph-based chunking
58
+ for para_info in paragraphs:
59
+ chunk = self._create_chunk(
60
+ content=para_info["content"],
61
+ file_path=file_path,
62
+ start_line=para_info["start_line"],
63
+ end_line=para_info["end_line"],
64
+ chunk_type="text",
65
+ )
66
+ chunks.append(chunk)
67
+ else:
68
+ # Fall back to line-based chunking for non-paragraph text
69
+ # Use smaller chunks for text files (30 lines instead of 50)
70
+ chunk_size = TEXT_CHUNK_SIZE
71
+ for i in range(0, len(lines), chunk_size):
72
+ start_line = i + 1
73
+ end_line = min(i + chunk_size, len(lines))
74
+
75
+ chunk_content = "".join(lines[i:end_line])
76
+
77
+ if chunk_content.strip():
78
+ chunk = self._create_chunk(
79
+ content=chunk_content,
80
+ file_path=file_path,
81
+ start_line=start_line,
82
+ end_line=end_line,
83
+ chunk_type="text",
84
+ )
85
+ chunks.append(chunk)
86
+
87
+ return chunks
88
+
89
+ def _extract_paragraphs(self, content: str) -> list[dict]:
90
+ """Extract paragraphs from text content.
91
+
92
+ A paragraph is defined as one or more non-empty lines
93
+ separated by empty lines.
94
+
95
+ Args:
96
+ content: Text content
97
+
98
+ Returns:
99
+ List of paragraph info dictionaries
100
+ """
101
+ lines = content.splitlines(keepends=True)
102
+ paragraphs = []
103
+ current_para = []
104
+ start_line = 1
105
+
106
+ for i, line in enumerate(lines, 1):
107
+ if line.strip():
108
+ if not current_para:
109
+ start_line = i
110
+ current_para.append(line)
111
+ else:
112
+ if current_para:
113
+ # End of paragraph
114
+ para_content = "".join(current_para)
115
+ if len(para_content.strip()) > 20: # Minimum paragraph size
116
+ paragraphs.append(
117
+ {
118
+ "content": para_content,
119
+ "start_line": start_line,
120
+ "end_line": i - 1,
121
+ }
122
+ )
123
+ current_para = []
124
+
125
+ # Handle last paragraph if exists
126
+ if current_para:
127
+ para_content = "".join(current_para)
128
+ if len(para_content.strip()) > 20:
129
+ paragraphs.append(
130
+ {
131
+ "content": para_content,
132
+ "start_line": start_line,
133
+ "end_line": len(lines),
134
+ }
135
+ )
136
+
137
+ # If we have very few paragraphs, merge small ones
138
+ if paragraphs:
139
+ merged = self._merge_small_paragraphs(paragraphs)
140
+ return merged
141
+
142
+ return []
143
+
144
+ def _merge_small_paragraphs(
145
+ self, paragraphs: list[dict], target_size: int = 200
146
+ ) -> list[dict]:
147
+ """Merge small paragraphs to create more substantial chunks.
148
+
149
+ Args:
150
+ paragraphs: List of paragraph dictionaries
151
+ target_size: Target size for merged paragraphs in characters
152
+
153
+ Returns:
154
+ List of merged paragraph dictionaries
155
+ """
156
+ merged = []
157
+ current_merge = None
158
+
159
+ for para in paragraphs:
160
+ para_len = len(para["content"])
161
+
162
+ if current_merge is None:
163
+ current_merge = para.copy()
164
+ elif len(current_merge["content"]) + para_len < target_size * 2:
165
+ # Merge with current
166
+ current_merge["content"] += "\n" + para["content"]
167
+ current_merge["end_line"] = para["end_line"]
168
+ else:
169
+ # Start new merge
170
+ if len(current_merge["content"].strip()) > 20:
171
+ merged.append(current_merge)
172
+ current_merge = para.copy()
173
+
174
+ # Add last merge
175
+ if current_merge and len(current_merge["content"].strip()) > 20:
176
+ merged.append(current_merge)
177
+
178
+ return merged
179
+
180
+ def get_supported_extensions(self) -> list[str]:
181
+ """Get list of supported file extensions.
182
+
183
+ Returns:
184
+ List of supported extensions
185
+ """
186
+ return [".txt", ".md", ".markdown"]
@@ -0,0 +1,265 @@
1
+ """Shared utilities for language parsers.
2
+
3
+ This module contains common functionality used across multiple parsers
4
+ to reduce code duplication and improve maintainability.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from re import Pattern
9
+
10
+ from ..config.constants import DEFAULT_CHUNK_SIZE
11
+ from ..core.models import CodeChunk
12
+
13
+
14
+ def split_into_lines(content: str) -> list[str]:
15
+ """Split content into lines, handling different line endings.
16
+
17
+ Args:
18
+ content: Text content to split
19
+
20
+ Returns:
21
+ List of lines with line endings preserved
22
+ """
23
+ # Handle different line endings and preserve them
24
+ return content.splitlines(keepends=True)
25
+
26
+
27
+ def get_line_range(lines: list[str], start_line: int, end_line: int) -> str:
28
+ """Get content from a range of lines.
29
+
30
+ Args:
31
+ lines: List of lines
32
+ start_line: Starting line number (1-indexed)
33
+ end_line: Ending line number (1-indexed, inclusive)
34
+
35
+ Returns:
36
+ Joined content from the line range
37
+ """
38
+ # Convert to 0-indexed
39
+ start_idx = max(0, start_line - 1)
40
+ end_idx = min(len(lines), end_line)
41
+
42
+ return "".join(lines[start_idx:end_idx])
43
+
44
+
45
+ def find_block_end(lines: list[str], start_line: int, indent_char: str = " ") -> int:
46
+ """Find the end of a code block based on indentation.
47
+
48
+ This is a simple heuristic that looks for the next line with equal or
49
+ lower indentation level than the starting line.
50
+
51
+ Args:
52
+ lines: List of lines
53
+ start_line: Starting line number (1-indexed)
54
+ indent_char: Character used for indentation (space or tab)
55
+
56
+ Returns:
57
+ End line number (1-indexed)
58
+ """
59
+ if start_line > len(lines):
60
+ return len(lines)
61
+
62
+ # Get indentation of starting line
63
+ start_idx = start_line - 1
64
+ start_indent = len(lines[start_idx]) - len(lines[start_idx].lstrip())
65
+
66
+ # Find next line with same or lower indentation
67
+ for i in range(start_idx + 1, len(lines)):
68
+ line = lines[i]
69
+ if line.strip(): # Skip empty lines
70
+ current_indent = len(line) - len(line.lstrip())
71
+ if current_indent <= start_indent:
72
+ return i # Return 0-indexed position, will be used as end_line
73
+
74
+ return len(lines)
75
+
76
+
77
+ def create_simple_chunks(
78
+ content: str, file_path: Path, chunk_size: int = DEFAULT_CHUNK_SIZE
79
+ ) -> list[CodeChunk]:
80
+ """Create simple line-based chunks from content.
81
+
82
+ This is a fallback chunking strategy when more sophisticated
83
+ parsing is not available.
84
+
85
+ Args:
86
+ content: File content
87
+ file_path: Path to source file
88
+ chunk_size: Number of lines per chunk
89
+
90
+ Returns:
91
+ List of code chunks
92
+ """
93
+ lines = split_into_lines(content)
94
+ chunks = []
95
+
96
+ for i in range(0, len(lines), chunk_size):
97
+ start_line = i + 1
98
+ end_line = min(i + chunk_size, len(lines))
99
+
100
+ chunk_content = get_line_range(lines, start_line, end_line)
101
+
102
+ if chunk_content.strip():
103
+ chunk = CodeChunk(
104
+ content=chunk_content,
105
+ start_line=start_line,
106
+ end_line=end_line,
107
+ file_path=str(file_path),
108
+ chunk_type="block",
109
+ metadata={"source": "simple_chunking"},
110
+ )
111
+ chunks.append(chunk)
112
+
113
+ return chunks
114
+
115
+
116
+ def extract_docstring(lines: list[str], start_line: int) -> str | None:
117
+ """Extract docstring/comment block starting from a given line.
118
+
119
+ Supports Python docstrings (triple quotes), JavaDoc (/** */),
120
+ and hash-based comments (# or //).
121
+
122
+ Args:
123
+ lines: List of lines
124
+ start_line: Line number to start looking (1-indexed)
125
+
126
+ Returns:
127
+ Docstring content or None if not found
128
+ """
129
+ if start_line > len(lines):
130
+ return None
131
+
132
+ start_idx = start_line - 1
133
+
134
+ # Check for Python-style docstring
135
+ triple_double = '"""'
136
+ triple_single = "'''"
137
+ for quote in [triple_double, triple_single]:
138
+ if quote in lines[start_idx]:
139
+ # Multi-line docstring
140
+ docstring_lines = []
141
+ in_docstring = False
142
+
143
+ for line in lines[start_idx:]:
144
+ if quote in line:
145
+ if in_docstring:
146
+ # End of docstring
147
+ docstring_lines.append(line[: line.index(quote) + 3])
148
+ break
149
+ else:
150
+ # Start of docstring
151
+ in_docstring = True
152
+ docstring_lines.append(line)
153
+ if line.count(quote) >= 2:
154
+ # Single-line docstring
155
+ break
156
+ elif in_docstring:
157
+ docstring_lines.append(line)
158
+
159
+ if docstring_lines:
160
+ return "".join(docstring_lines).strip()
161
+
162
+ # Check for JavaDoc-style comment
163
+ if start_idx > 0 and "/**" in lines[start_idx - 1]:
164
+ comment_lines = []
165
+ for i in range(start_idx - 1, -1, -1):
166
+ comment_lines.insert(0, lines[i])
167
+ if "/**" in lines[i]:
168
+ break
169
+
170
+ for i in range(start_idx, len(lines)):
171
+ if "*/" in lines[i]:
172
+ comment_lines.append(lines[i])
173
+ break
174
+ comment_lines.append(lines[i])
175
+
176
+ return "".join(comment_lines).strip()
177
+
178
+ # Check for hash/slash comments on previous lines
179
+ comment_lines = []
180
+ for i in range(start_idx - 1, -1, -1):
181
+ line = lines[i].strip()
182
+ if line.startswith("#") or line.startswith("//"):
183
+ comment_lines.insert(0, lines[i])
184
+ elif line:
185
+ break
186
+
187
+ if comment_lines:
188
+ return "".join(comment_lines).strip()
189
+
190
+ return None
191
+
192
+
193
+ def extract_imports_with_pattern(
194
+ content: str, pattern: Pattern[str], chunk_type: str = "import"
195
+ ) -> list[str]:
196
+ """Extract import/require/use statements using a regex pattern.
197
+
198
+ Args:
199
+ content: Source code content
200
+ pattern: Compiled regex pattern to match imports
201
+ chunk_type: Type of import (import, require, use, etc.)
202
+
203
+ Returns:
204
+ List of import statements
205
+ """
206
+ imports = []
207
+ for match in pattern.finditer(content):
208
+ import_line = match.group(0).strip()
209
+ imports.append(import_line)
210
+ return imports
211
+
212
+
213
+ def find_code_blocks_with_patterns(
214
+ content: str, lines: list[str], patterns: dict[str, Pattern[str]], file_path: Path
215
+ ) -> list[CodeChunk]:
216
+ """Find code blocks (functions, classes, etc.) using regex patterns.
217
+
218
+ This is a generic fallback parser that can be configured with different
219
+ patterns for different languages.
220
+
221
+ Args:
222
+ content: Source code content
223
+ lines: Pre-split lines
224
+ patterns: Dictionary mapping block types to compiled regex patterns
225
+ file_path: Path to source file
226
+
227
+ Returns:
228
+ List of code chunks
229
+ """
230
+ chunks = []
231
+
232
+ for block_type, pattern in patterns.items():
233
+ for match in pattern.finditer(content):
234
+ # Extract the name from the first capturing group
235
+ name = match.group(1) if match.groups() else "unknown"
236
+
237
+ # Find line number
238
+ match_pos = match.start()
239
+ start_line = content[:match_pos].count("\n") + 1
240
+
241
+ # Find end of block using indentation
242
+ end_line = find_block_end(lines, start_line)
243
+
244
+ # Get block content
245
+ block_content = get_line_range(lines, start_line, end_line)
246
+
247
+ if block_content.strip():
248
+ # Extract docstring
249
+ docstring = extract_docstring(lines, start_line + 1)
250
+
251
+ chunk = CodeChunk(
252
+ content=block_content,
253
+ start_line=start_line,
254
+ end_line=end_line,
255
+ file_path=str(file_path),
256
+ chunk_type=block_type,
257
+ metadata={
258
+ "name": name,
259
+ "docstring": docstring,
260
+ "source": "regex_fallback",
261
+ },
262
+ )
263
+ chunks.append(chunk)
264
+
265
+ return chunks
@@ -0,0 +1 @@
1
+ # PEP 561 marker file for type information
@@ -0,0 +1,42 @@
1
+ """Utility modules for MCP Vector Search."""
2
+
3
+ from .gitignore import (
4
+ GitignoreParser,
5
+ GitignorePattern,
6
+ create_gitignore_parser,
7
+ is_path_gitignored,
8
+ )
9
+ from .gitignore_updater import ensure_gitignore_entry
10
+ from .timing import (
11
+ PerformanceProfiler,
12
+ SearchProfiler,
13
+ TimingResult,
14
+ get_global_profiler,
15
+ print_global_report,
16
+ time_async_block,
17
+ time_block,
18
+ time_function,
19
+ )
20
+ from .version import get_user_agent, get_version_info, get_version_string
21
+
22
+ __all__ = [
23
+ # Gitignore utilities
24
+ "GitignoreParser",
25
+ "GitignorePattern",
26
+ "create_gitignore_parser",
27
+ "is_path_gitignored",
28
+ "ensure_gitignore_entry",
29
+ # Timing utilities
30
+ "PerformanceProfiler",
31
+ "TimingResult",
32
+ "time_function",
33
+ "time_block",
34
+ "time_async_block",
35
+ "get_global_profiler",
36
+ "print_global_report",
37
+ "SearchProfiler",
38
+ # Version utilities
39
+ "get_version_info",
40
+ "get_version_string",
41
+ "get_user_agent",
42
+ ]