mcp-vector-search 0.15.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +10 -0
- mcp_vector_search/cli/__init__.py +1 -0
- mcp_vector_search/cli/commands/__init__.py +1 -0
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/chat.py +534 -0
- mcp_vector_search/cli/commands/config.py +393 -0
- mcp_vector_search/cli/commands/demo.py +358 -0
- mcp_vector_search/cli/commands/index.py +762 -0
- mcp_vector_search/cli/commands/init.py +658 -0
- mcp_vector_search/cli/commands/install.py +869 -0
- mcp_vector_search/cli/commands/install_old.py +700 -0
- mcp_vector_search/cli/commands/mcp.py +1254 -0
- mcp_vector_search/cli/commands/reset.py +393 -0
- mcp_vector_search/cli/commands/search.py +796 -0
- mcp_vector_search/cli/commands/setup.py +1133 -0
- mcp_vector_search/cli/commands/status.py +584 -0
- mcp_vector_search/cli/commands/uninstall.py +404 -0
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +265 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +201 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
- mcp_vector_search/cli/commands/visualize.py.original +2536 -0
- mcp_vector_search/cli/commands/watch.py +287 -0
- mcp_vector_search/cli/didyoumean.py +520 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +295 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +484 -0
- mcp_vector_search/cli/output.py +414 -0
- mcp_vector_search/cli/suggestions.py +375 -0
- mcp_vector_search/config/__init__.py +1 -0
- mcp_vector_search/config/constants.py +24 -0
- mcp_vector_search/config/defaults.py +200 -0
- mcp_vector_search/config/settings.py +146 -0
- mcp_vector_search/core/__init__.py +1 -0
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/connection_pool.py +360 -0
- mcp_vector_search/core/database.py +1237 -0
- mcp_vector_search/core/directory_index.py +318 -0
- mcp_vector_search/core/embeddings.py +294 -0
- mcp_vector_search/core/exceptions.py +89 -0
- mcp_vector_search/core/factory.py +318 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +1002 -0
- mcp_vector_search/core/llm_client.py +453 -0
- mcp_vector_search/core/models.py +294 -0
- mcp_vector_search/core/project.py +350 -0
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +952 -0
- mcp_vector_search/core/watcher.py +322 -0
- mcp_vector_search/mcp/__init__.py +5 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +752 -0
- mcp_vector_search/parsers/__init__.py +8 -0
- mcp_vector_search/parsers/base.py +296 -0
- mcp_vector_search/parsers/dart.py +605 -0
- mcp_vector_search/parsers/html.py +413 -0
- mcp_vector_search/parsers/javascript.py +643 -0
- mcp_vector_search/parsers/php.py +694 -0
- mcp_vector_search/parsers/python.py +502 -0
- mcp_vector_search/parsers/registry.py +223 -0
- mcp_vector_search/parsers/ruby.py +678 -0
- mcp_vector_search/parsers/text.py +186 -0
- mcp_vector_search/parsers/utils.py +265 -0
- mcp_vector_search/py.typed +1 -0
- mcp_vector_search/utils/__init__.py +42 -0
- mcp_vector_search/utils/gitignore.py +250 -0
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +339 -0
- mcp_vector_search/utils/timing.py +338 -0
- mcp_vector_search/utils/version.py +47 -0
- mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
- mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
- mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
- mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
- mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Text file parser for MCP Vector Search."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from ..config.constants import TEXT_CHUNK_SIZE
|
|
6
|
+
from ..core.models import CodeChunk
|
|
7
|
+
from .base import BaseParser
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TextParser(BaseParser):
|
|
11
|
+
"""Parser for plain text and markdown files (.txt, .md, .markdown)."""
|
|
12
|
+
|
|
13
|
+
def __init__(self) -> None:
|
|
14
|
+
"""Initialize text parser."""
|
|
15
|
+
super().__init__("text")
|
|
16
|
+
|
|
17
|
+
async def parse_file(self, file_path: Path) -> list[CodeChunk]:
|
|
18
|
+
"""Parse a text file and extract chunks.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
file_path: Path to the text file
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
List of text chunks
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
with open(file_path, encoding="utf-8") as f:
|
|
28
|
+
content = f.read()
|
|
29
|
+
return await self.parse_content(content, file_path)
|
|
30
|
+
except Exception:
|
|
31
|
+
# Return empty list if file can't be read
|
|
32
|
+
return []
|
|
33
|
+
|
|
34
|
+
async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
35
|
+
"""Parse text content into semantic chunks.
|
|
36
|
+
|
|
37
|
+
Uses paragraph-based chunking for better semantic coherence.
|
|
38
|
+
Falls back to line-based chunking for non-paragraph text.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
content: Text content to parse
|
|
42
|
+
file_path: Path to the source file
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
List of text chunks
|
|
46
|
+
"""
|
|
47
|
+
if not content.strip():
|
|
48
|
+
return []
|
|
49
|
+
|
|
50
|
+
chunks = []
|
|
51
|
+
lines = content.splitlines(keepends=True)
|
|
52
|
+
|
|
53
|
+
# Try paragraph-based chunking first
|
|
54
|
+
paragraphs = self._extract_paragraphs(content)
|
|
55
|
+
|
|
56
|
+
if paragraphs:
|
|
57
|
+
# Use paragraph-based chunking
|
|
58
|
+
for para_info in paragraphs:
|
|
59
|
+
chunk = self._create_chunk(
|
|
60
|
+
content=para_info["content"],
|
|
61
|
+
file_path=file_path,
|
|
62
|
+
start_line=para_info["start_line"],
|
|
63
|
+
end_line=para_info["end_line"],
|
|
64
|
+
chunk_type="text",
|
|
65
|
+
)
|
|
66
|
+
chunks.append(chunk)
|
|
67
|
+
else:
|
|
68
|
+
# Fall back to line-based chunking for non-paragraph text
|
|
69
|
+
# Use smaller chunks for text files (30 lines instead of 50)
|
|
70
|
+
chunk_size = TEXT_CHUNK_SIZE
|
|
71
|
+
for i in range(0, len(lines), chunk_size):
|
|
72
|
+
start_line = i + 1
|
|
73
|
+
end_line = min(i + chunk_size, len(lines))
|
|
74
|
+
|
|
75
|
+
chunk_content = "".join(lines[i:end_line])
|
|
76
|
+
|
|
77
|
+
if chunk_content.strip():
|
|
78
|
+
chunk = self._create_chunk(
|
|
79
|
+
content=chunk_content,
|
|
80
|
+
file_path=file_path,
|
|
81
|
+
start_line=start_line,
|
|
82
|
+
end_line=end_line,
|
|
83
|
+
chunk_type="text",
|
|
84
|
+
)
|
|
85
|
+
chunks.append(chunk)
|
|
86
|
+
|
|
87
|
+
return chunks
|
|
88
|
+
|
|
89
|
+
def _extract_paragraphs(self, content: str) -> list[dict]:
|
|
90
|
+
"""Extract paragraphs from text content.
|
|
91
|
+
|
|
92
|
+
A paragraph is defined as one or more non-empty lines
|
|
93
|
+
separated by empty lines.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
content: Text content
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
List of paragraph info dictionaries
|
|
100
|
+
"""
|
|
101
|
+
lines = content.splitlines(keepends=True)
|
|
102
|
+
paragraphs = []
|
|
103
|
+
current_para = []
|
|
104
|
+
start_line = 1
|
|
105
|
+
|
|
106
|
+
for i, line in enumerate(lines, 1):
|
|
107
|
+
if line.strip():
|
|
108
|
+
if not current_para:
|
|
109
|
+
start_line = i
|
|
110
|
+
current_para.append(line)
|
|
111
|
+
else:
|
|
112
|
+
if current_para:
|
|
113
|
+
# End of paragraph
|
|
114
|
+
para_content = "".join(current_para)
|
|
115
|
+
if len(para_content.strip()) > 20: # Minimum paragraph size
|
|
116
|
+
paragraphs.append(
|
|
117
|
+
{
|
|
118
|
+
"content": para_content,
|
|
119
|
+
"start_line": start_line,
|
|
120
|
+
"end_line": i - 1,
|
|
121
|
+
}
|
|
122
|
+
)
|
|
123
|
+
current_para = []
|
|
124
|
+
|
|
125
|
+
# Handle last paragraph if exists
|
|
126
|
+
if current_para:
|
|
127
|
+
para_content = "".join(current_para)
|
|
128
|
+
if len(para_content.strip()) > 20:
|
|
129
|
+
paragraphs.append(
|
|
130
|
+
{
|
|
131
|
+
"content": para_content,
|
|
132
|
+
"start_line": start_line,
|
|
133
|
+
"end_line": len(lines),
|
|
134
|
+
}
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# If we have very few paragraphs, merge small ones
|
|
138
|
+
if paragraphs:
|
|
139
|
+
merged = self._merge_small_paragraphs(paragraphs)
|
|
140
|
+
return merged
|
|
141
|
+
|
|
142
|
+
return []
|
|
143
|
+
|
|
144
|
+
def _merge_small_paragraphs(
|
|
145
|
+
self, paragraphs: list[dict], target_size: int = 200
|
|
146
|
+
) -> list[dict]:
|
|
147
|
+
"""Merge small paragraphs to create more substantial chunks.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
paragraphs: List of paragraph dictionaries
|
|
151
|
+
target_size: Target size for merged paragraphs in characters
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
List of merged paragraph dictionaries
|
|
155
|
+
"""
|
|
156
|
+
merged = []
|
|
157
|
+
current_merge = None
|
|
158
|
+
|
|
159
|
+
for para in paragraphs:
|
|
160
|
+
para_len = len(para["content"])
|
|
161
|
+
|
|
162
|
+
if current_merge is None:
|
|
163
|
+
current_merge = para.copy()
|
|
164
|
+
elif len(current_merge["content"]) + para_len < target_size * 2:
|
|
165
|
+
# Merge with current
|
|
166
|
+
current_merge["content"] += "\n" + para["content"]
|
|
167
|
+
current_merge["end_line"] = para["end_line"]
|
|
168
|
+
else:
|
|
169
|
+
# Start new merge
|
|
170
|
+
if len(current_merge["content"].strip()) > 20:
|
|
171
|
+
merged.append(current_merge)
|
|
172
|
+
current_merge = para.copy()
|
|
173
|
+
|
|
174
|
+
# Add last merge
|
|
175
|
+
if current_merge and len(current_merge["content"].strip()) > 20:
|
|
176
|
+
merged.append(current_merge)
|
|
177
|
+
|
|
178
|
+
return merged
|
|
179
|
+
|
|
180
|
+
def get_supported_extensions(self) -> list[str]:
|
|
181
|
+
"""Get list of supported file extensions.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
List of supported extensions
|
|
185
|
+
"""
|
|
186
|
+
return [".txt", ".md", ".markdown"]
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""Shared utilities for language parsers.
|
|
2
|
+
|
|
3
|
+
This module contains common functionality used across multiple parsers
|
|
4
|
+
to reduce code duplication and improve maintainability.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from re import Pattern
|
|
9
|
+
|
|
10
|
+
from ..config.constants import DEFAULT_CHUNK_SIZE
|
|
11
|
+
from ..core.models import CodeChunk
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def split_into_lines(content: str) -> list[str]:
|
|
15
|
+
"""Split content into lines, handling different line endings.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
content: Text content to split
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
List of lines with line endings preserved
|
|
22
|
+
"""
|
|
23
|
+
# Handle different line endings and preserve them
|
|
24
|
+
return content.splitlines(keepends=True)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_line_range(lines: list[str], start_line: int, end_line: int) -> str:
|
|
28
|
+
"""Get content from a range of lines.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
lines: List of lines
|
|
32
|
+
start_line: Starting line number (1-indexed)
|
|
33
|
+
end_line: Ending line number (1-indexed, inclusive)
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Joined content from the line range
|
|
37
|
+
"""
|
|
38
|
+
# Convert to 0-indexed
|
|
39
|
+
start_idx = max(0, start_line - 1)
|
|
40
|
+
end_idx = min(len(lines), end_line)
|
|
41
|
+
|
|
42
|
+
return "".join(lines[start_idx:end_idx])
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def find_block_end(lines: list[str], start_line: int, indent_char: str = " ") -> int:
|
|
46
|
+
"""Find the end of a code block based on indentation.
|
|
47
|
+
|
|
48
|
+
This is a simple heuristic that looks for the next line with equal or
|
|
49
|
+
lower indentation level than the starting line.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
lines: List of lines
|
|
53
|
+
start_line: Starting line number (1-indexed)
|
|
54
|
+
indent_char: Character used for indentation (space or tab)
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
End line number (1-indexed)
|
|
58
|
+
"""
|
|
59
|
+
if start_line > len(lines):
|
|
60
|
+
return len(lines)
|
|
61
|
+
|
|
62
|
+
# Get indentation of starting line
|
|
63
|
+
start_idx = start_line - 1
|
|
64
|
+
start_indent = len(lines[start_idx]) - len(lines[start_idx].lstrip())
|
|
65
|
+
|
|
66
|
+
# Find next line with same or lower indentation
|
|
67
|
+
for i in range(start_idx + 1, len(lines)):
|
|
68
|
+
line = lines[i]
|
|
69
|
+
if line.strip(): # Skip empty lines
|
|
70
|
+
current_indent = len(line) - len(line.lstrip())
|
|
71
|
+
if current_indent <= start_indent:
|
|
72
|
+
return i # Return 0-indexed position, will be used as end_line
|
|
73
|
+
|
|
74
|
+
return len(lines)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def create_simple_chunks(
|
|
78
|
+
content: str, file_path: Path, chunk_size: int = DEFAULT_CHUNK_SIZE
|
|
79
|
+
) -> list[CodeChunk]:
|
|
80
|
+
"""Create simple line-based chunks from content.
|
|
81
|
+
|
|
82
|
+
This is a fallback chunking strategy when more sophisticated
|
|
83
|
+
parsing is not available.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
content: File content
|
|
87
|
+
file_path: Path to source file
|
|
88
|
+
chunk_size: Number of lines per chunk
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
List of code chunks
|
|
92
|
+
"""
|
|
93
|
+
lines = split_into_lines(content)
|
|
94
|
+
chunks = []
|
|
95
|
+
|
|
96
|
+
for i in range(0, len(lines), chunk_size):
|
|
97
|
+
start_line = i + 1
|
|
98
|
+
end_line = min(i + chunk_size, len(lines))
|
|
99
|
+
|
|
100
|
+
chunk_content = get_line_range(lines, start_line, end_line)
|
|
101
|
+
|
|
102
|
+
if chunk_content.strip():
|
|
103
|
+
chunk = CodeChunk(
|
|
104
|
+
content=chunk_content,
|
|
105
|
+
start_line=start_line,
|
|
106
|
+
end_line=end_line,
|
|
107
|
+
file_path=str(file_path),
|
|
108
|
+
chunk_type="block",
|
|
109
|
+
metadata={"source": "simple_chunking"},
|
|
110
|
+
)
|
|
111
|
+
chunks.append(chunk)
|
|
112
|
+
|
|
113
|
+
return chunks
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def extract_docstring(lines: list[str], start_line: int) -> str | None:
|
|
117
|
+
"""Extract docstring/comment block starting from a given line.
|
|
118
|
+
|
|
119
|
+
Supports Python docstrings (triple quotes), JavaDoc (/** */),
|
|
120
|
+
and hash-based comments (# or //).
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
lines: List of lines
|
|
124
|
+
start_line: Line number to start looking (1-indexed)
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Docstring content or None if not found
|
|
128
|
+
"""
|
|
129
|
+
if start_line > len(lines):
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
start_idx = start_line - 1
|
|
133
|
+
|
|
134
|
+
# Check for Python-style docstring
|
|
135
|
+
triple_double = '"""'
|
|
136
|
+
triple_single = "'''"
|
|
137
|
+
for quote in [triple_double, triple_single]:
|
|
138
|
+
if quote in lines[start_idx]:
|
|
139
|
+
# Multi-line docstring
|
|
140
|
+
docstring_lines = []
|
|
141
|
+
in_docstring = False
|
|
142
|
+
|
|
143
|
+
for line in lines[start_idx:]:
|
|
144
|
+
if quote in line:
|
|
145
|
+
if in_docstring:
|
|
146
|
+
# End of docstring
|
|
147
|
+
docstring_lines.append(line[: line.index(quote) + 3])
|
|
148
|
+
break
|
|
149
|
+
else:
|
|
150
|
+
# Start of docstring
|
|
151
|
+
in_docstring = True
|
|
152
|
+
docstring_lines.append(line)
|
|
153
|
+
if line.count(quote) >= 2:
|
|
154
|
+
# Single-line docstring
|
|
155
|
+
break
|
|
156
|
+
elif in_docstring:
|
|
157
|
+
docstring_lines.append(line)
|
|
158
|
+
|
|
159
|
+
if docstring_lines:
|
|
160
|
+
return "".join(docstring_lines).strip()
|
|
161
|
+
|
|
162
|
+
# Check for JavaDoc-style comment
|
|
163
|
+
if start_idx > 0 and "/**" in lines[start_idx - 1]:
|
|
164
|
+
comment_lines = []
|
|
165
|
+
for i in range(start_idx - 1, -1, -1):
|
|
166
|
+
comment_lines.insert(0, lines[i])
|
|
167
|
+
if "/**" in lines[i]:
|
|
168
|
+
break
|
|
169
|
+
|
|
170
|
+
for i in range(start_idx, len(lines)):
|
|
171
|
+
if "*/" in lines[i]:
|
|
172
|
+
comment_lines.append(lines[i])
|
|
173
|
+
break
|
|
174
|
+
comment_lines.append(lines[i])
|
|
175
|
+
|
|
176
|
+
return "".join(comment_lines).strip()
|
|
177
|
+
|
|
178
|
+
# Check for hash/slash comments on previous lines
|
|
179
|
+
comment_lines = []
|
|
180
|
+
for i in range(start_idx - 1, -1, -1):
|
|
181
|
+
line = lines[i].strip()
|
|
182
|
+
if line.startswith("#") or line.startswith("//"):
|
|
183
|
+
comment_lines.insert(0, lines[i])
|
|
184
|
+
elif line:
|
|
185
|
+
break
|
|
186
|
+
|
|
187
|
+
if comment_lines:
|
|
188
|
+
return "".join(comment_lines).strip()
|
|
189
|
+
|
|
190
|
+
return None
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def extract_imports_with_pattern(
|
|
194
|
+
content: str, pattern: Pattern[str], chunk_type: str = "import"
|
|
195
|
+
) -> list[str]:
|
|
196
|
+
"""Extract import/require/use statements using a regex pattern.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
content: Source code content
|
|
200
|
+
pattern: Compiled regex pattern to match imports
|
|
201
|
+
chunk_type: Type of import (import, require, use, etc.)
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
List of import statements
|
|
205
|
+
"""
|
|
206
|
+
imports = []
|
|
207
|
+
for match in pattern.finditer(content):
|
|
208
|
+
import_line = match.group(0).strip()
|
|
209
|
+
imports.append(import_line)
|
|
210
|
+
return imports
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def find_code_blocks_with_patterns(
|
|
214
|
+
content: str, lines: list[str], patterns: dict[str, Pattern[str]], file_path: Path
|
|
215
|
+
) -> list[CodeChunk]:
|
|
216
|
+
"""Find code blocks (functions, classes, etc.) using regex patterns.
|
|
217
|
+
|
|
218
|
+
This is a generic fallback parser that can be configured with different
|
|
219
|
+
patterns for different languages.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
content: Source code content
|
|
223
|
+
lines: Pre-split lines
|
|
224
|
+
patterns: Dictionary mapping block types to compiled regex patterns
|
|
225
|
+
file_path: Path to source file
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
List of code chunks
|
|
229
|
+
"""
|
|
230
|
+
chunks = []
|
|
231
|
+
|
|
232
|
+
for block_type, pattern in patterns.items():
|
|
233
|
+
for match in pattern.finditer(content):
|
|
234
|
+
# Extract the name from the first capturing group
|
|
235
|
+
name = match.group(1) if match.groups() else "unknown"
|
|
236
|
+
|
|
237
|
+
# Find line number
|
|
238
|
+
match_pos = match.start()
|
|
239
|
+
start_line = content[:match_pos].count("\n") + 1
|
|
240
|
+
|
|
241
|
+
# Find end of block using indentation
|
|
242
|
+
end_line = find_block_end(lines, start_line)
|
|
243
|
+
|
|
244
|
+
# Get block content
|
|
245
|
+
block_content = get_line_range(lines, start_line, end_line)
|
|
246
|
+
|
|
247
|
+
if block_content.strip():
|
|
248
|
+
# Extract docstring
|
|
249
|
+
docstring = extract_docstring(lines, start_line + 1)
|
|
250
|
+
|
|
251
|
+
chunk = CodeChunk(
|
|
252
|
+
content=block_content,
|
|
253
|
+
start_line=start_line,
|
|
254
|
+
end_line=end_line,
|
|
255
|
+
file_path=str(file_path),
|
|
256
|
+
chunk_type=block_type,
|
|
257
|
+
metadata={
|
|
258
|
+
"name": name,
|
|
259
|
+
"docstring": docstring,
|
|
260
|
+
"source": "regex_fallback",
|
|
261
|
+
},
|
|
262
|
+
)
|
|
263
|
+
chunks.append(chunk)
|
|
264
|
+
|
|
265
|
+
return chunks
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# PEP 561 marker file for type information
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Utility modules for MCP Vector Search."""
|
|
2
|
+
|
|
3
|
+
from .gitignore import (
|
|
4
|
+
GitignoreParser,
|
|
5
|
+
GitignorePattern,
|
|
6
|
+
create_gitignore_parser,
|
|
7
|
+
is_path_gitignored,
|
|
8
|
+
)
|
|
9
|
+
from .gitignore_updater import ensure_gitignore_entry
|
|
10
|
+
from .timing import (
|
|
11
|
+
PerformanceProfiler,
|
|
12
|
+
SearchProfiler,
|
|
13
|
+
TimingResult,
|
|
14
|
+
get_global_profiler,
|
|
15
|
+
print_global_report,
|
|
16
|
+
time_async_block,
|
|
17
|
+
time_block,
|
|
18
|
+
time_function,
|
|
19
|
+
)
|
|
20
|
+
from .version import get_user_agent, get_version_info, get_version_string
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
# Gitignore utilities
|
|
24
|
+
"GitignoreParser",
|
|
25
|
+
"GitignorePattern",
|
|
26
|
+
"create_gitignore_parser",
|
|
27
|
+
"is_path_gitignored",
|
|
28
|
+
"ensure_gitignore_entry",
|
|
29
|
+
# Timing utilities
|
|
30
|
+
"PerformanceProfiler",
|
|
31
|
+
"TimingResult",
|
|
32
|
+
"time_function",
|
|
33
|
+
"time_block",
|
|
34
|
+
"time_async_block",
|
|
35
|
+
"get_global_profiler",
|
|
36
|
+
"print_global_report",
|
|
37
|
+
"SearchProfiler",
|
|
38
|
+
# Version utilities
|
|
39
|
+
"get_version_info",
|
|
40
|
+
"get_version_string",
|
|
41
|
+
"get_user_agent",
|
|
42
|
+
]
|