mcp-vector-search 0.7.6__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +2 -2
- mcp_vector_search/cli/commands/visualize.py +523 -0
- mcp_vector_search/cli/main.py +16 -11
- mcp_vector_search/core/indexer.py +72 -2
- mcp_vector_search/core/models.py +45 -1
- mcp_vector_search/parsers/base.py +83 -0
- mcp_vector_search/parsers/javascript.py +350 -2
- mcp_vector_search/parsers/python.py +79 -0
- {mcp_vector_search-0.7.6.dist-info → mcp_vector_search-0.8.0.dist-info}/METADATA +1 -1
- {mcp_vector_search-0.7.6.dist-info → mcp_vector_search-0.8.0.dist-info}/RECORD +13 -12
- {mcp_vector_search-0.7.6.dist-info → mcp_vector_search-0.8.0.dist-info}/WHEEL +0 -0
- {mcp_vector_search-0.7.6.dist-info → mcp_vector_search-0.8.0.dist-info}/entry_points.txt +0 -0
- {mcp_vector_search-0.7.6.dist-info → mcp_vector_search-0.8.0.dist-info}/licenses/LICENSE +0 -0
mcp_vector_search/core/models.py
CHANGED
|
@@ -21,12 +21,40 @@ class CodeChunk:
|
|
|
21
21
|
class_name: str | None = None
|
|
22
22
|
docstring: str | None = None
|
|
23
23
|
imports: list[str] = None
|
|
24
|
+
|
|
25
|
+
# Enhancement 1: Complexity scoring
|
|
24
26
|
complexity_score: float = 0.0
|
|
25
27
|
|
|
28
|
+
# Enhancement 3: Hierarchical relationships
|
|
29
|
+
chunk_id: str | None = None
|
|
30
|
+
parent_chunk_id: str | None = None
|
|
31
|
+
child_chunk_ids: list[str] = None
|
|
32
|
+
chunk_depth: int = 0
|
|
33
|
+
|
|
34
|
+
# Enhancement 4: Enhanced metadata
|
|
35
|
+
decorators: list[str] = None
|
|
36
|
+
parameters: list[dict] = None
|
|
37
|
+
return_type: str | None = None
|
|
38
|
+
type_annotations: dict[str, str] = None
|
|
39
|
+
|
|
26
40
|
def __post_init__(self) -> None:
|
|
27
|
-
"""Initialize default values."""
|
|
41
|
+
"""Initialize default values and generate chunk ID."""
|
|
28
42
|
if self.imports is None:
|
|
29
43
|
self.imports = []
|
|
44
|
+
if self.child_chunk_ids is None:
|
|
45
|
+
self.child_chunk_ids = []
|
|
46
|
+
if self.decorators is None:
|
|
47
|
+
self.decorators = []
|
|
48
|
+
if self.parameters is None:
|
|
49
|
+
self.parameters = []
|
|
50
|
+
if self.type_annotations is None:
|
|
51
|
+
self.type_annotations = {}
|
|
52
|
+
|
|
53
|
+
# Generate chunk ID if not provided
|
|
54
|
+
if self.chunk_id is None:
|
|
55
|
+
import hashlib
|
|
56
|
+
id_string = f"{self.file_path}:{self.chunk_type}:{self.start_line}:{self.end_line}"
|
|
57
|
+
self.chunk_id = hashlib.sha256(id_string.encode()).hexdigest()[:16]
|
|
30
58
|
|
|
31
59
|
@property
|
|
32
60
|
def id(self) -> str:
|
|
@@ -52,6 +80,14 @@ class CodeChunk:
|
|
|
52
80
|
"docstring": self.docstring,
|
|
53
81
|
"imports": self.imports,
|
|
54
82
|
"complexity_score": self.complexity_score,
|
|
83
|
+
"chunk_id": self.chunk_id,
|
|
84
|
+
"parent_chunk_id": self.parent_chunk_id,
|
|
85
|
+
"child_chunk_ids": self.child_chunk_ids,
|
|
86
|
+
"chunk_depth": self.chunk_depth,
|
|
87
|
+
"decorators": self.decorators,
|
|
88
|
+
"parameters": self.parameters,
|
|
89
|
+
"return_type": self.return_type,
|
|
90
|
+
"type_annotations": self.type_annotations,
|
|
55
91
|
}
|
|
56
92
|
|
|
57
93
|
@classmethod
|
|
@@ -69,6 +105,14 @@ class CodeChunk:
|
|
|
69
105
|
docstring=data.get("docstring"),
|
|
70
106
|
imports=data.get("imports", []),
|
|
71
107
|
complexity_score=data.get("complexity_score", 0.0),
|
|
108
|
+
chunk_id=data.get("chunk_id"),
|
|
109
|
+
parent_chunk_id=data.get("parent_chunk_id"),
|
|
110
|
+
child_chunk_ids=data.get("child_chunk_ids", []),
|
|
111
|
+
chunk_depth=data.get("chunk_depth", 0),
|
|
112
|
+
decorators=data.get("decorators", []),
|
|
113
|
+
parameters=data.get("parameters", []),
|
|
114
|
+
return_type=data.get("return_type"),
|
|
115
|
+
type_annotations=data.get("type_annotations", {}),
|
|
72
116
|
)
|
|
73
117
|
|
|
74
118
|
|
|
@@ -64,6 +64,68 @@ class BaseParser(ABC):
|
|
|
64
64
|
"""
|
|
65
65
|
...
|
|
66
66
|
|
|
67
|
+
def _calculate_complexity(self, node, language: str | None = None) -> float:
|
|
68
|
+
"""Calculate cyclomatic complexity from AST node.
|
|
69
|
+
|
|
70
|
+
Cyclomatic complexity = Number of decision points + 1
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
node: AST node (tree-sitter)
|
|
74
|
+
language: Programming language for language-specific patterns (defaults to self.language)
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Complexity score (1.0 = simple, 10+ = complex)
|
|
78
|
+
"""
|
|
79
|
+
if language is None:
|
|
80
|
+
language = self.language
|
|
81
|
+
|
|
82
|
+
if not hasattr(node, 'children'):
|
|
83
|
+
return 1.0
|
|
84
|
+
|
|
85
|
+
complexity = 1.0 # Base complexity
|
|
86
|
+
|
|
87
|
+
# Language-specific decision node types
|
|
88
|
+
decision_nodes = {
|
|
89
|
+
"python": {
|
|
90
|
+
"if_statement", "elif_clause", "while_statement", "for_statement",
|
|
91
|
+
"except_clause", "with_statement", "conditional_expression",
|
|
92
|
+
"boolean_operator" # and, or
|
|
93
|
+
},
|
|
94
|
+
"javascript": {
|
|
95
|
+
"if_statement", "while_statement", "for_statement", "for_in_statement",
|
|
96
|
+
"switch_case", "catch_clause", "conditional_expression", "ternary_expression"
|
|
97
|
+
},
|
|
98
|
+
"typescript": {
|
|
99
|
+
"if_statement", "while_statement", "for_statement", "for_in_statement",
|
|
100
|
+
"switch_case", "catch_clause", "conditional_expression", "ternary_expression"
|
|
101
|
+
},
|
|
102
|
+
"dart": {
|
|
103
|
+
"if_statement", "while_statement", "for_statement", "for_in_statement",
|
|
104
|
+
"switch_case", "catch_clause", "conditional_expression"
|
|
105
|
+
},
|
|
106
|
+
"php": {
|
|
107
|
+
"if_statement", "elseif_clause", "while_statement", "foreach_statement",
|
|
108
|
+
"for_statement", "switch_case", "catch_clause", "ternary_expression"
|
|
109
|
+
},
|
|
110
|
+
"ruby": {
|
|
111
|
+
"if", "unless", "while", "until", "for", "case", "rescue",
|
|
112
|
+
"conditional"
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
nodes_to_count = decision_nodes.get(language, decision_nodes.get("python", set()))
|
|
117
|
+
|
|
118
|
+
def count_decision_points(n):
|
|
119
|
+
nonlocal complexity
|
|
120
|
+
if hasattr(n, 'type') and n.type in nodes_to_count:
|
|
121
|
+
complexity += 1
|
|
122
|
+
if hasattr(n, 'children'):
|
|
123
|
+
for child in n.children:
|
|
124
|
+
count_decision_points(child)
|
|
125
|
+
|
|
126
|
+
count_decision_points(node)
|
|
127
|
+
return complexity
|
|
128
|
+
|
|
67
129
|
def _create_chunk(
|
|
68
130
|
self,
|
|
69
131
|
content: str,
|
|
@@ -74,6 +136,13 @@ class BaseParser(ABC):
|
|
|
74
136
|
function_name: str | None = None,
|
|
75
137
|
class_name: str | None = None,
|
|
76
138
|
docstring: str | None = None,
|
|
139
|
+
complexity_score: float = 0.0,
|
|
140
|
+
decorators: list[str] | None = None,
|
|
141
|
+
parameters: list[dict] | None = None,
|
|
142
|
+
return_type: str | None = None,
|
|
143
|
+
chunk_id: str | None = None,
|
|
144
|
+
parent_chunk_id: str | None = None,
|
|
145
|
+
chunk_depth: int = 0,
|
|
77
146
|
) -> CodeChunk:
|
|
78
147
|
"""Create a code chunk with metadata.
|
|
79
148
|
|
|
@@ -86,6 +155,13 @@ class BaseParser(ABC):
|
|
|
86
155
|
function_name: Function name if applicable
|
|
87
156
|
class_name: Class name if applicable
|
|
88
157
|
docstring: Docstring if applicable
|
|
158
|
+
complexity_score: Cyclomatic complexity score
|
|
159
|
+
decorators: List of decorators/annotations
|
|
160
|
+
parameters: List of function parameters with metadata
|
|
161
|
+
return_type: Return type annotation
|
|
162
|
+
chunk_id: Unique chunk identifier
|
|
163
|
+
parent_chunk_id: Parent chunk ID for hierarchical relationships
|
|
164
|
+
chunk_depth: Nesting level in code hierarchy
|
|
89
165
|
|
|
90
166
|
Returns:
|
|
91
167
|
CodeChunk instance
|
|
@@ -100,6 +176,13 @@ class BaseParser(ABC):
|
|
|
100
176
|
function_name=function_name,
|
|
101
177
|
class_name=class_name,
|
|
102
178
|
docstring=docstring,
|
|
179
|
+
complexity_score=complexity_score,
|
|
180
|
+
decorators=decorators or [],
|
|
181
|
+
parameters=parameters or [],
|
|
182
|
+
return_type=return_type,
|
|
183
|
+
chunk_id=chunk_id,
|
|
184
|
+
parent_chunk_id=parent_chunk_id,
|
|
185
|
+
chunk_depth=chunk_depth,
|
|
103
186
|
)
|
|
104
187
|
|
|
105
188
|
def _split_into_lines(self, content: str) -> list[str]:
|
|
@@ -10,11 +10,32 @@ from .base import BaseParser
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class JavaScriptParser(BaseParser):
|
|
13
|
-
"""JavaScript
|
|
13
|
+
"""JavaScript parser with tree-sitter AST support and fallback regex parsing."""
|
|
14
14
|
|
|
15
15
|
def __init__(self, language: str = "javascript") -> None:
|
|
16
16
|
"""Initialize JavaScript parser."""
|
|
17
17
|
super().__init__(language)
|
|
18
|
+
self._parser = None
|
|
19
|
+
self._language = None
|
|
20
|
+
self._use_tree_sitter = False
|
|
21
|
+
self._initialize_parser()
|
|
22
|
+
|
|
23
|
+
def _initialize_parser(self) -> None:
|
|
24
|
+
"""Initialize Tree-sitter parser for JavaScript."""
|
|
25
|
+
try:
|
|
26
|
+
from tree_sitter_language_pack import get_language, get_parser
|
|
27
|
+
|
|
28
|
+
self._language = get_language("javascript")
|
|
29
|
+
self._parser = get_parser("javascript")
|
|
30
|
+
|
|
31
|
+
logger.debug(
|
|
32
|
+
"JavaScript Tree-sitter parser initialized via tree-sitter-language-pack"
|
|
33
|
+
)
|
|
34
|
+
self._use_tree_sitter = True
|
|
35
|
+
return
|
|
36
|
+
except Exception as e:
|
|
37
|
+
logger.debug(f"tree-sitter-language-pack failed: {e}, using regex fallback")
|
|
38
|
+
self._use_tree_sitter = False
|
|
18
39
|
|
|
19
40
|
async def parse_file(self, file_path: Path) -> list[CodeChunk]:
|
|
20
41
|
"""Parse a JavaScript/TypeScript file and extract code chunks."""
|
|
@@ -31,7 +52,317 @@ class JavaScriptParser(BaseParser):
|
|
|
31
52
|
if not content.strip():
|
|
32
53
|
return []
|
|
33
54
|
|
|
34
|
-
|
|
55
|
+
if self._use_tree_sitter:
|
|
56
|
+
try:
|
|
57
|
+
tree = self._parser.parse(content.encode('utf-8'))
|
|
58
|
+
return self._extract_chunks_from_tree(tree, content, file_path)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logger.warning(f"Tree-sitter parsing failed for {file_path}: {e}")
|
|
61
|
+
return await self._regex_parse(content, file_path)
|
|
62
|
+
else:
|
|
63
|
+
return await self._regex_parse(content, file_path)
|
|
64
|
+
|
|
65
|
+
def _extract_chunks_from_tree(
|
|
66
|
+
self, tree, content: str, file_path: Path
|
|
67
|
+
) -> list[CodeChunk]:
|
|
68
|
+
"""Extract code chunks from JavaScript AST."""
|
|
69
|
+
chunks = []
|
|
70
|
+
lines = self._split_into_lines(content)
|
|
71
|
+
|
|
72
|
+
def visit_node(node, current_class=None):
|
|
73
|
+
"""Recursively visit AST nodes."""
|
|
74
|
+
node_type = node.type
|
|
75
|
+
|
|
76
|
+
if node_type == "function_declaration":
|
|
77
|
+
chunks.extend(self._extract_function(node, lines, file_path, current_class))
|
|
78
|
+
elif node_type == "arrow_function":
|
|
79
|
+
chunks.extend(self._extract_arrow_function(node, lines, file_path, current_class))
|
|
80
|
+
elif node_type == "class_declaration":
|
|
81
|
+
class_chunks = self._extract_class(node, lines, file_path)
|
|
82
|
+
chunks.extend(class_chunks)
|
|
83
|
+
|
|
84
|
+
# Visit class methods
|
|
85
|
+
class_name = self._get_node_name(node)
|
|
86
|
+
for child in node.children:
|
|
87
|
+
visit_node(child, class_name)
|
|
88
|
+
elif node_type == "method_definition":
|
|
89
|
+
chunks.extend(self._extract_method(node, lines, file_path, current_class))
|
|
90
|
+
elif node_type == "lexical_declaration":
|
|
91
|
+
# const/let declarations might be arrow functions
|
|
92
|
+
chunks.extend(self._extract_variable_function(node, lines, file_path, current_class))
|
|
93
|
+
|
|
94
|
+
# Recurse into children
|
|
95
|
+
if hasattr(node, 'children'):
|
|
96
|
+
for child in node.children:
|
|
97
|
+
if child.type not in ("class_declaration", "function_declaration"):
|
|
98
|
+
visit_node(child, current_class)
|
|
99
|
+
|
|
100
|
+
visit_node(tree.root_node)
|
|
101
|
+
|
|
102
|
+
# If no specific chunks found, create a single chunk for the whole file
|
|
103
|
+
if not chunks:
|
|
104
|
+
chunks.append(
|
|
105
|
+
self._create_chunk(
|
|
106
|
+
content=content,
|
|
107
|
+
file_path=file_path,
|
|
108
|
+
start_line=1,
|
|
109
|
+
end_line=len(lines),
|
|
110
|
+
chunk_type="module",
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return chunks
|
|
115
|
+
|
|
116
|
+
def _extract_function(
|
|
117
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
118
|
+
) -> list[CodeChunk]:
|
|
119
|
+
"""Extract function declaration from AST."""
|
|
120
|
+
function_name = self._get_node_name(node)
|
|
121
|
+
if not function_name:
|
|
122
|
+
return []
|
|
123
|
+
|
|
124
|
+
start_line = node.start_point[0] + 1
|
|
125
|
+
end_line = node.end_point[0] + 1
|
|
126
|
+
|
|
127
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
128
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
129
|
+
|
|
130
|
+
# Calculate complexity
|
|
131
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
132
|
+
|
|
133
|
+
# Extract parameters
|
|
134
|
+
parameters = self._extract_js_parameters(node)
|
|
135
|
+
|
|
136
|
+
chunk = self._create_chunk(
|
|
137
|
+
content=content,
|
|
138
|
+
file_path=file_path,
|
|
139
|
+
start_line=start_line,
|
|
140
|
+
end_line=end_line,
|
|
141
|
+
chunk_type="function",
|
|
142
|
+
function_name=function_name,
|
|
143
|
+
class_name=class_name,
|
|
144
|
+
docstring=docstring,
|
|
145
|
+
complexity_score=complexity,
|
|
146
|
+
parameters=parameters,
|
|
147
|
+
chunk_depth=2 if class_name else 1,
|
|
148
|
+
)
|
|
149
|
+
return [chunk]
|
|
150
|
+
|
|
151
|
+
def _extract_arrow_function(
|
|
152
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
153
|
+
) -> list[CodeChunk]:
|
|
154
|
+
"""Extract arrow function from AST."""
|
|
155
|
+
# Arrow functions often don't have explicit names, try to get from parent
|
|
156
|
+
parent = getattr(node, 'parent', None)
|
|
157
|
+
function_name = None
|
|
158
|
+
|
|
159
|
+
if parent and parent.type == "variable_declarator":
|
|
160
|
+
function_name = self._get_node_name(parent)
|
|
161
|
+
|
|
162
|
+
if not function_name:
|
|
163
|
+
return []
|
|
164
|
+
|
|
165
|
+
start_line = node.start_point[0] + 1
|
|
166
|
+
end_line = node.end_point[0] + 1
|
|
167
|
+
|
|
168
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
169
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
170
|
+
|
|
171
|
+
# Calculate complexity
|
|
172
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
173
|
+
|
|
174
|
+
# Extract parameters
|
|
175
|
+
parameters = self._extract_js_parameters(node)
|
|
176
|
+
|
|
177
|
+
chunk = self._create_chunk(
|
|
178
|
+
content=content,
|
|
179
|
+
file_path=file_path,
|
|
180
|
+
start_line=start_line,
|
|
181
|
+
end_line=end_line,
|
|
182
|
+
chunk_type="function",
|
|
183
|
+
function_name=function_name,
|
|
184
|
+
class_name=class_name,
|
|
185
|
+
docstring=docstring,
|
|
186
|
+
complexity_score=complexity,
|
|
187
|
+
parameters=parameters,
|
|
188
|
+
chunk_depth=2 if class_name else 1,
|
|
189
|
+
)
|
|
190
|
+
return [chunk]
|
|
191
|
+
|
|
192
|
+
def _extract_variable_function(
|
|
193
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
194
|
+
) -> list[CodeChunk]:
|
|
195
|
+
"""Extract function from variable declaration (const func = ...)."""
|
|
196
|
+
chunks = []
|
|
197
|
+
|
|
198
|
+
for child in node.children:
|
|
199
|
+
if child.type == "variable_declarator":
|
|
200
|
+
# Check if it's a function assignment
|
|
201
|
+
for subchild in child.children:
|
|
202
|
+
if subchild.type in ("arrow_function", "function"):
|
|
203
|
+
func_name = self._get_node_name(child)
|
|
204
|
+
if func_name:
|
|
205
|
+
start_line = child.start_point[0] + 1
|
|
206
|
+
end_line = child.end_point[0] + 1
|
|
207
|
+
|
|
208
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
209
|
+
docstring = self._extract_jsdoc_from_node(child, lines)
|
|
210
|
+
|
|
211
|
+
# Calculate complexity
|
|
212
|
+
complexity = self._calculate_complexity(subchild, "javascript")
|
|
213
|
+
|
|
214
|
+
# Extract parameters
|
|
215
|
+
parameters = self._extract_js_parameters(subchild)
|
|
216
|
+
|
|
217
|
+
chunk = self._create_chunk(
|
|
218
|
+
content=content,
|
|
219
|
+
file_path=file_path,
|
|
220
|
+
start_line=start_line,
|
|
221
|
+
end_line=end_line,
|
|
222
|
+
chunk_type="function",
|
|
223
|
+
function_name=func_name,
|
|
224
|
+
class_name=class_name,
|
|
225
|
+
docstring=docstring,
|
|
226
|
+
complexity_score=complexity,
|
|
227
|
+
parameters=parameters,
|
|
228
|
+
chunk_depth=2 if class_name else 1,
|
|
229
|
+
)
|
|
230
|
+
chunks.append(chunk)
|
|
231
|
+
|
|
232
|
+
return chunks
|
|
233
|
+
|
|
234
|
+
def _extract_class(
|
|
235
|
+
self, node, lines: list[str], file_path: Path
|
|
236
|
+
) -> list[CodeChunk]:
|
|
237
|
+
"""Extract class declaration from AST."""
|
|
238
|
+
class_name = self._get_node_name(node)
|
|
239
|
+
if not class_name:
|
|
240
|
+
return []
|
|
241
|
+
|
|
242
|
+
start_line = node.start_point[0] + 1
|
|
243
|
+
end_line = node.end_point[0] + 1
|
|
244
|
+
|
|
245
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
246
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
247
|
+
|
|
248
|
+
# Calculate complexity
|
|
249
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
250
|
+
|
|
251
|
+
chunk = self._create_chunk(
|
|
252
|
+
content=content,
|
|
253
|
+
file_path=file_path,
|
|
254
|
+
start_line=start_line,
|
|
255
|
+
end_line=end_line,
|
|
256
|
+
chunk_type="class",
|
|
257
|
+
class_name=class_name,
|
|
258
|
+
docstring=docstring,
|
|
259
|
+
complexity_score=complexity,
|
|
260
|
+
chunk_depth=1,
|
|
261
|
+
)
|
|
262
|
+
return [chunk]
|
|
263
|
+
|
|
264
|
+
def _extract_method(
|
|
265
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
266
|
+
) -> list[CodeChunk]:
|
|
267
|
+
"""Extract method definition from class."""
|
|
268
|
+
method_name = self._get_node_name(node)
|
|
269
|
+
if not method_name:
|
|
270
|
+
return []
|
|
271
|
+
|
|
272
|
+
start_line = node.start_point[0] + 1
|
|
273
|
+
end_line = node.end_point[0] + 1
|
|
274
|
+
|
|
275
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
276
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
277
|
+
|
|
278
|
+
# Calculate complexity
|
|
279
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
280
|
+
|
|
281
|
+
# Extract parameters
|
|
282
|
+
parameters = self._extract_js_parameters(node)
|
|
283
|
+
|
|
284
|
+
# Check for decorators (TypeScript)
|
|
285
|
+
decorators = self._extract_decorators_from_node(node)
|
|
286
|
+
|
|
287
|
+
chunk = self._create_chunk(
|
|
288
|
+
content=content,
|
|
289
|
+
file_path=file_path,
|
|
290
|
+
start_line=start_line,
|
|
291
|
+
end_line=end_line,
|
|
292
|
+
chunk_type="method",
|
|
293
|
+
function_name=method_name,
|
|
294
|
+
class_name=class_name,
|
|
295
|
+
docstring=docstring,
|
|
296
|
+
complexity_score=complexity,
|
|
297
|
+
parameters=parameters,
|
|
298
|
+
decorators=decorators,
|
|
299
|
+
chunk_depth=2,
|
|
300
|
+
)
|
|
301
|
+
return [chunk]
|
|
302
|
+
|
|
303
|
+
def _get_node_name(self, node) -> str | None:
|
|
304
|
+
"""Extract name from a named node."""
|
|
305
|
+
for child in node.children:
|
|
306
|
+
if child.type in ("identifier", "property_identifier"):
|
|
307
|
+
return child.text.decode("utf-8")
|
|
308
|
+
return None
|
|
309
|
+
|
|
310
|
+
def _get_node_text(self, node) -> str:
|
|
311
|
+
"""Get text content of a node."""
|
|
312
|
+
if hasattr(node, 'text'):
|
|
313
|
+
return node.text.decode('utf-8')
|
|
314
|
+
return ""
|
|
315
|
+
|
|
316
|
+
def _extract_js_parameters(self, node) -> list[dict]:
|
|
317
|
+
"""Extract function parameters from JavaScript/TypeScript AST."""
|
|
318
|
+
parameters = []
|
|
319
|
+
|
|
320
|
+
for child in node.children:
|
|
321
|
+
if child.type == "formal_parameters":
|
|
322
|
+
for param_node in child.children:
|
|
323
|
+
if param_node.type in ("identifier", "required_parameter", "optional_parameter", "rest_parameter"):
|
|
324
|
+
param_info = {
|
|
325
|
+
"name": None,
|
|
326
|
+
"type": None,
|
|
327
|
+
"default": None
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
# Extract parameter details
|
|
331
|
+
if param_node.type == "identifier":
|
|
332
|
+
param_info["name"] = self._get_node_text(param_node)
|
|
333
|
+
else:
|
|
334
|
+
# TypeScript typed parameters
|
|
335
|
+
for subchild in param_node.children:
|
|
336
|
+
if subchild.type == "identifier":
|
|
337
|
+
param_info["name"] = self._get_node_text(subchild)
|
|
338
|
+
elif subchild.type == "type_annotation":
|
|
339
|
+
param_info["type"] = self._get_node_text(subchild)
|
|
340
|
+
elif "default" in subchild.type or subchild.type == "number":
|
|
341
|
+
param_info["default"] = self._get_node_text(subchild)
|
|
342
|
+
|
|
343
|
+
if param_info["name"] and param_info["name"] not in ("(", ")", ",", "..."):
|
|
344
|
+
# Clean up rest parameters
|
|
345
|
+
if param_info["name"].startswith("..."):
|
|
346
|
+
param_info["name"] = param_info["name"][3:]
|
|
347
|
+
param_info["rest"] = True
|
|
348
|
+
parameters.append(param_info)
|
|
349
|
+
|
|
350
|
+
return parameters
|
|
351
|
+
|
|
352
|
+
def _extract_decorators_from_node(self, node) -> list[str]:
|
|
353
|
+
"""Extract decorators from TypeScript node."""
|
|
354
|
+
decorators = []
|
|
355
|
+
|
|
356
|
+
for child in node.children:
|
|
357
|
+
if child.type == "decorator":
|
|
358
|
+
decorators.append(self._get_node_text(child))
|
|
359
|
+
|
|
360
|
+
return decorators
|
|
361
|
+
|
|
362
|
+
def _extract_jsdoc_from_node(self, node, lines: list[str]) -> str | None:
|
|
363
|
+
"""Extract JSDoc comment from before a node."""
|
|
364
|
+
start_line = node.start_point[0]
|
|
365
|
+
return self._extract_jsdoc(lines, start_line + 1)
|
|
35
366
|
|
|
36
367
|
async def _regex_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
37
368
|
"""Parse JavaScript/TypeScript using regex patterns."""
|
|
@@ -262,3 +593,20 @@ class TypeScriptParser(JavaScriptParser):
|
|
|
262
593
|
def __init__(self) -> None:
|
|
263
594
|
"""Initialize TypeScript parser."""
|
|
264
595
|
super().__init__("typescript")
|
|
596
|
+
|
|
597
|
+
def _initialize_parser(self) -> None:
|
|
598
|
+
"""Initialize Tree-sitter parser for TypeScript."""
|
|
599
|
+
try:
|
|
600
|
+
from tree_sitter_language_pack import get_language, get_parser
|
|
601
|
+
|
|
602
|
+
self._language = get_language("typescript")
|
|
603
|
+
self._parser = get_parser("typescript")
|
|
604
|
+
|
|
605
|
+
logger.debug(
|
|
606
|
+
"TypeScript Tree-sitter parser initialized via tree-sitter-language-pack"
|
|
607
|
+
)
|
|
608
|
+
self._use_tree_sitter = True
|
|
609
|
+
return
|
|
610
|
+
except Exception as e:
|
|
611
|
+
logger.debug(f"tree-sitter-language-pack failed: {e}, using regex fallback")
|
|
612
|
+
self._use_tree_sitter = False
|
|
@@ -150,6 +150,18 @@ class PythonParser(BaseParser):
|
|
|
150
150
|
# Extract docstring if present
|
|
151
151
|
docstring = self._extract_docstring(node, lines)
|
|
152
152
|
|
|
153
|
+
# Enhancement 1: Calculate complexity
|
|
154
|
+
complexity = self._calculate_complexity(node, "python")
|
|
155
|
+
|
|
156
|
+
# Enhancement 4: Extract decorators
|
|
157
|
+
decorators = self._extract_decorators(node, lines)
|
|
158
|
+
|
|
159
|
+
# Enhancement 4: Extract parameters
|
|
160
|
+
parameters = self._extract_parameters(node)
|
|
161
|
+
|
|
162
|
+
# Enhancement 4: Extract return type
|
|
163
|
+
return_type = self._extract_return_type(node)
|
|
164
|
+
|
|
153
165
|
chunk = self._create_chunk(
|
|
154
166
|
content=content,
|
|
155
167
|
file_path=file_path,
|
|
@@ -159,6 +171,11 @@ class PythonParser(BaseParser):
|
|
|
159
171
|
function_name=function_name,
|
|
160
172
|
class_name=class_name,
|
|
161
173
|
docstring=docstring,
|
|
174
|
+
complexity_score=complexity,
|
|
175
|
+
decorators=decorators,
|
|
176
|
+
parameters=parameters,
|
|
177
|
+
return_type=return_type,
|
|
178
|
+
chunk_depth=2 if class_name else 1,
|
|
162
179
|
)
|
|
163
180
|
chunks.append(chunk)
|
|
164
181
|
|
|
@@ -180,6 +197,12 @@ class PythonParser(BaseParser):
|
|
|
180
197
|
# Extract docstring if present
|
|
181
198
|
docstring = self._extract_docstring(node, lines)
|
|
182
199
|
|
|
200
|
+
# Enhancement 1: Calculate complexity (for the entire class)
|
|
201
|
+
complexity = self._calculate_complexity(node, "python")
|
|
202
|
+
|
|
203
|
+
# Enhancement 4: Extract decorators
|
|
204
|
+
decorators = self._extract_decorators(node, lines)
|
|
205
|
+
|
|
183
206
|
chunk = self._create_chunk(
|
|
184
207
|
content=content,
|
|
185
208
|
file_path=file_path,
|
|
@@ -188,6 +211,9 @@ class PythonParser(BaseParser):
|
|
|
188
211
|
chunk_type="class",
|
|
189
212
|
class_name=class_name,
|
|
190
213
|
docstring=docstring,
|
|
214
|
+
complexity_score=complexity,
|
|
215
|
+
decorators=decorators,
|
|
216
|
+
chunk_depth=1,
|
|
191
217
|
)
|
|
192
218
|
chunks.append(chunk)
|
|
193
219
|
|
|
@@ -410,6 +436,59 @@ class PythonParser(BaseParser):
|
|
|
410
436
|
|
|
411
437
|
return None
|
|
412
438
|
|
|
439
|
+
def _extract_decorators(self, node, lines: list[str]) -> list[str]:
|
|
440
|
+
"""Extract decorator names from function/class node."""
|
|
441
|
+
decorators = []
|
|
442
|
+
for child in node.children:
|
|
443
|
+
if child.type == "decorator":
|
|
444
|
+
# Get decorator text (includes @ symbol)
|
|
445
|
+
dec_text = self._get_node_text(child).strip()
|
|
446
|
+
decorators.append(dec_text)
|
|
447
|
+
return decorators
|
|
448
|
+
|
|
449
|
+
def _extract_parameters(self, node) -> list[dict]:
|
|
450
|
+
"""Extract function parameters with type annotations."""
|
|
451
|
+
parameters = []
|
|
452
|
+
for child in node.children:
|
|
453
|
+
if child.type == "parameters":
|
|
454
|
+
for param_node in child.children:
|
|
455
|
+
if param_node.type in ("identifier", "typed_parameter", "default_parameter"):
|
|
456
|
+
param_info = {
|
|
457
|
+
"name": None,
|
|
458
|
+
"type": None,
|
|
459
|
+
"default": None
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
# Extract parameter name
|
|
463
|
+
if param_node.type == "identifier":
|
|
464
|
+
param_info["name"] = self._get_node_text(param_node)
|
|
465
|
+
else:
|
|
466
|
+
# For typed or default parameters, find the identifier
|
|
467
|
+
for subchild in param_node.children:
|
|
468
|
+
if subchild.type == "identifier":
|
|
469
|
+
param_info["name"] = self._get_node_text(subchild)
|
|
470
|
+
elif subchild.type == "type":
|
|
471
|
+
param_info["type"] = self._get_node_text(subchild)
|
|
472
|
+
elif "default" in subchild.type:
|
|
473
|
+
param_info["default"] = self._get_node_text(subchild)
|
|
474
|
+
|
|
475
|
+
if param_info["name"] and param_info["name"] not in ("self", "cls", "(", ")", ","):
|
|
476
|
+
parameters.append(param_info)
|
|
477
|
+
return parameters
|
|
478
|
+
|
|
479
|
+
def _extract_return_type(self, node) -> str | None:
|
|
480
|
+
"""Extract return type annotation from function."""
|
|
481
|
+
for child in node.children:
|
|
482
|
+
if child.type == "type":
|
|
483
|
+
return self._get_node_text(child)
|
|
484
|
+
return None
|
|
485
|
+
|
|
486
|
+
def _get_node_text(self, node) -> str:
|
|
487
|
+
"""Get text content of a node."""
|
|
488
|
+
if hasattr(node, 'text'):
|
|
489
|
+
return node.text.decode('utf-8')
|
|
490
|
+
return ""
|
|
491
|
+
|
|
413
492
|
def get_supported_extensions(self) -> list[str]:
|
|
414
493
|
"""Get supported file extensions."""
|
|
415
494
|
return [".py", ".pyw"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mcp-vector-search
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: CLI-first semantic code search with MCP integration
|
|
5
5
|
Project-URL: Homepage, https://github.com/bobmatnyc/mcp-vector-search
|
|
6
6
|
Project-URL: Documentation, https://mcp-vector-search.readthedocs.io
|