mcp-vector-search 0.15.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +10 -0
- mcp_vector_search/cli/__init__.py +1 -0
- mcp_vector_search/cli/commands/__init__.py +1 -0
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/chat.py +534 -0
- mcp_vector_search/cli/commands/config.py +393 -0
- mcp_vector_search/cli/commands/demo.py +358 -0
- mcp_vector_search/cli/commands/index.py +762 -0
- mcp_vector_search/cli/commands/init.py +658 -0
- mcp_vector_search/cli/commands/install.py +869 -0
- mcp_vector_search/cli/commands/install_old.py +700 -0
- mcp_vector_search/cli/commands/mcp.py +1254 -0
- mcp_vector_search/cli/commands/reset.py +393 -0
- mcp_vector_search/cli/commands/search.py +796 -0
- mcp_vector_search/cli/commands/setup.py +1133 -0
- mcp_vector_search/cli/commands/status.py +584 -0
- mcp_vector_search/cli/commands/uninstall.py +404 -0
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +265 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +201 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
- mcp_vector_search/cli/commands/visualize.py.original +2536 -0
- mcp_vector_search/cli/commands/watch.py +287 -0
- mcp_vector_search/cli/didyoumean.py +520 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +295 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +484 -0
- mcp_vector_search/cli/output.py +414 -0
- mcp_vector_search/cli/suggestions.py +375 -0
- mcp_vector_search/config/__init__.py +1 -0
- mcp_vector_search/config/constants.py +24 -0
- mcp_vector_search/config/defaults.py +200 -0
- mcp_vector_search/config/settings.py +146 -0
- mcp_vector_search/core/__init__.py +1 -0
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/connection_pool.py +360 -0
- mcp_vector_search/core/database.py +1237 -0
- mcp_vector_search/core/directory_index.py +318 -0
- mcp_vector_search/core/embeddings.py +294 -0
- mcp_vector_search/core/exceptions.py +89 -0
- mcp_vector_search/core/factory.py +318 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +1002 -0
- mcp_vector_search/core/llm_client.py +453 -0
- mcp_vector_search/core/models.py +294 -0
- mcp_vector_search/core/project.py +350 -0
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +952 -0
- mcp_vector_search/core/watcher.py +322 -0
- mcp_vector_search/mcp/__init__.py +5 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +752 -0
- mcp_vector_search/parsers/__init__.py +8 -0
- mcp_vector_search/parsers/base.py +296 -0
- mcp_vector_search/parsers/dart.py +605 -0
- mcp_vector_search/parsers/html.py +413 -0
- mcp_vector_search/parsers/javascript.py +643 -0
- mcp_vector_search/parsers/php.py +694 -0
- mcp_vector_search/parsers/python.py +502 -0
- mcp_vector_search/parsers/registry.py +223 -0
- mcp_vector_search/parsers/ruby.py +678 -0
- mcp_vector_search/parsers/text.py +186 -0
- mcp_vector_search/parsers/utils.py +265 -0
- mcp_vector_search/py.typed +1 -0
- mcp_vector_search/utils/__init__.py +42 -0
- mcp_vector_search/utils/gitignore.py +250 -0
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +339 -0
- mcp_vector_search/utils/timing.py +338 -0
- mcp_vector_search/utils/version.py +47 -0
- mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
- mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
- mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
- mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
- mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,643 @@
|
|
|
1
|
+
"""JavaScript/TypeScript parser for MCP Vector Search."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
from ..core.models import CodeChunk
|
|
9
|
+
from .base import BaseParser
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class JavaScriptParser(BaseParser):
|
|
13
|
+
"""JavaScript parser with tree-sitter AST support and fallback regex parsing."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, language: str = "javascript") -> None:
|
|
16
|
+
"""Initialize JavaScript parser."""
|
|
17
|
+
super().__init__(language)
|
|
18
|
+
self._parser = None
|
|
19
|
+
self._language = None
|
|
20
|
+
self._use_tree_sitter = False
|
|
21
|
+
self._initialize_parser()
|
|
22
|
+
|
|
23
|
+
def _initialize_parser(self) -> None:
|
|
24
|
+
"""Initialize Tree-sitter parser for JavaScript."""
|
|
25
|
+
try:
|
|
26
|
+
from tree_sitter_language_pack import get_language, get_parser
|
|
27
|
+
|
|
28
|
+
self._language = get_language("javascript")
|
|
29
|
+
self._parser = get_parser("javascript")
|
|
30
|
+
|
|
31
|
+
logger.debug(
|
|
32
|
+
"JavaScript Tree-sitter parser initialized via tree-sitter-language-pack"
|
|
33
|
+
)
|
|
34
|
+
self._use_tree_sitter = True
|
|
35
|
+
return
|
|
36
|
+
except Exception as e:
|
|
37
|
+
logger.debug(f"tree-sitter-language-pack failed: {e}, using regex fallback")
|
|
38
|
+
self._use_tree_sitter = False
|
|
39
|
+
|
|
40
|
+
async def parse_file(self, file_path: Path) -> list[CodeChunk]:
|
|
41
|
+
"""Parse a JavaScript/TypeScript file and extract code chunks."""
|
|
42
|
+
try:
|
|
43
|
+
with open(file_path, encoding="utf-8") as f:
|
|
44
|
+
content = f.read()
|
|
45
|
+
return await self.parse_content(content, file_path)
|
|
46
|
+
except Exception as e:
|
|
47
|
+
logger.error(f"Failed to read file {file_path}: {e}")
|
|
48
|
+
return []
|
|
49
|
+
|
|
50
|
+
async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
51
|
+
"""Parse JavaScript/TypeScript content and extract code chunks."""
|
|
52
|
+
if not content.strip():
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
if self._use_tree_sitter:
|
|
56
|
+
try:
|
|
57
|
+
tree = self._parser.parse(content.encode("utf-8"))
|
|
58
|
+
return self._extract_chunks_from_tree(tree, content, file_path)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logger.warning(f"Tree-sitter parsing failed for {file_path}: {e}")
|
|
61
|
+
return await self._regex_parse(content, file_path)
|
|
62
|
+
else:
|
|
63
|
+
return await self._regex_parse(content, file_path)
|
|
64
|
+
|
|
65
|
+
def _extract_chunks_from_tree(
|
|
66
|
+
self, tree, content: str, file_path: Path
|
|
67
|
+
) -> list[CodeChunk]:
|
|
68
|
+
"""Extract code chunks from JavaScript AST."""
|
|
69
|
+
chunks = []
|
|
70
|
+
lines = self._split_into_lines(content)
|
|
71
|
+
|
|
72
|
+
def visit_node(node, current_class=None):
|
|
73
|
+
"""Recursively visit AST nodes."""
|
|
74
|
+
node_type = node.type
|
|
75
|
+
|
|
76
|
+
# Check if this node type should be extracted
|
|
77
|
+
extracted = False
|
|
78
|
+
|
|
79
|
+
if node_type == "function_declaration":
|
|
80
|
+
chunks.extend(
|
|
81
|
+
self._extract_function(node, lines, file_path, current_class)
|
|
82
|
+
)
|
|
83
|
+
extracted = True
|
|
84
|
+
elif node_type == "arrow_function":
|
|
85
|
+
chunks.extend(
|
|
86
|
+
self._extract_arrow_function(node, lines, file_path, current_class)
|
|
87
|
+
)
|
|
88
|
+
extracted = True
|
|
89
|
+
elif node_type == "class_declaration":
|
|
90
|
+
class_chunks = self._extract_class(node, lines, file_path)
|
|
91
|
+
chunks.extend(class_chunks)
|
|
92
|
+
|
|
93
|
+
# Visit class methods
|
|
94
|
+
class_name = self._get_node_name(node)
|
|
95
|
+
for child in node.children:
|
|
96
|
+
visit_node(child, class_name)
|
|
97
|
+
extracted = True
|
|
98
|
+
elif node_type == "method_definition":
|
|
99
|
+
chunks.extend(
|
|
100
|
+
self._extract_method(node, lines, file_path, current_class)
|
|
101
|
+
)
|
|
102
|
+
extracted = True
|
|
103
|
+
elif node_type == "lexical_declaration":
|
|
104
|
+
# const/let declarations might be arrow functions
|
|
105
|
+
extracted_chunks = self._extract_variable_function(
|
|
106
|
+
node, lines, file_path, current_class
|
|
107
|
+
)
|
|
108
|
+
if extracted_chunks:
|
|
109
|
+
chunks.extend(extracted_chunks)
|
|
110
|
+
extracted = True
|
|
111
|
+
|
|
112
|
+
# Only recurse into children if we didn't extract this node
|
|
113
|
+
# This prevents double-extraction of arrow functions in variable declarations
|
|
114
|
+
if not extracted and hasattr(node, "children"):
|
|
115
|
+
for child in node.children:
|
|
116
|
+
visit_node(child, current_class)
|
|
117
|
+
|
|
118
|
+
visit_node(tree.root_node)
|
|
119
|
+
|
|
120
|
+
# If no specific chunks found, create a single chunk for the whole file
|
|
121
|
+
if not chunks:
|
|
122
|
+
chunks.append(
|
|
123
|
+
self._create_chunk(
|
|
124
|
+
content=content,
|
|
125
|
+
file_path=file_path,
|
|
126
|
+
start_line=1,
|
|
127
|
+
end_line=len(lines),
|
|
128
|
+
chunk_type="module",
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return chunks
|
|
133
|
+
|
|
134
|
+
def _extract_function(
|
|
135
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
136
|
+
) -> list[CodeChunk]:
|
|
137
|
+
"""Extract function declaration from AST."""
|
|
138
|
+
function_name = self._get_node_name(node)
|
|
139
|
+
if not function_name:
|
|
140
|
+
return []
|
|
141
|
+
|
|
142
|
+
start_line = node.start_point[0] + 1
|
|
143
|
+
end_line = node.end_point[0] + 1
|
|
144
|
+
|
|
145
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
146
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
147
|
+
|
|
148
|
+
# Calculate complexity
|
|
149
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
150
|
+
|
|
151
|
+
# Extract parameters
|
|
152
|
+
parameters = self._extract_js_parameters(node)
|
|
153
|
+
|
|
154
|
+
chunk = self._create_chunk(
|
|
155
|
+
content=content,
|
|
156
|
+
file_path=file_path,
|
|
157
|
+
start_line=start_line,
|
|
158
|
+
end_line=end_line,
|
|
159
|
+
chunk_type="function",
|
|
160
|
+
function_name=function_name,
|
|
161
|
+
class_name=class_name,
|
|
162
|
+
docstring=docstring,
|
|
163
|
+
complexity_score=complexity,
|
|
164
|
+
parameters=parameters,
|
|
165
|
+
chunk_depth=2 if class_name else 1,
|
|
166
|
+
)
|
|
167
|
+
return [chunk]
|
|
168
|
+
|
|
169
|
+
def _extract_arrow_function(
|
|
170
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
171
|
+
) -> list[CodeChunk]:
|
|
172
|
+
"""Extract arrow function from AST."""
|
|
173
|
+
# Arrow functions often don't have explicit names, try to get from parent
|
|
174
|
+
parent = getattr(node, "parent", None)
|
|
175
|
+
function_name = None
|
|
176
|
+
|
|
177
|
+
if parent and parent.type == "variable_declarator":
|
|
178
|
+
function_name = self._get_node_name(parent)
|
|
179
|
+
|
|
180
|
+
if not function_name:
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
start_line = node.start_point[0] + 1
|
|
184
|
+
end_line = node.end_point[0] + 1
|
|
185
|
+
|
|
186
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
187
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
188
|
+
|
|
189
|
+
# Calculate complexity
|
|
190
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
191
|
+
|
|
192
|
+
# Extract parameters
|
|
193
|
+
parameters = self._extract_js_parameters(node)
|
|
194
|
+
|
|
195
|
+
chunk = self._create_chunk(
|
|
196
|
+
content=content,
|
|
197
|
+
file_path=file_path,
|
|
198
|
+
start_line=start_line,
|
|
199
|
+
end_line=end_line,
|
|
200
|
+
chunk_type="function",
|
|
201
|
+
function_name=function_name,
|
|
202
|
+
class_name=class_name,
|
|
203
|
+
docstring=docstring,
|
|
204
|
+
complexity_score=complexity,
|
|
205
|
+
parameters=parameters,
|
|
206
|
+
chunk_depth=2 if class_name else 1,
|
|
207
|
+
)
|
|
208
|
+
return [chunk]
|
|
209
|
+
|
|
210
|
+
def _extract_variable_function(
|
|
211
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
212
|
+
) -> list[CodeChunk]:
|
|
213
|
+
"""Extract function from variable declaration (const func = ...)."""
|
|
214
|
+
chunks = []
|
|
215
|
+
|
|
216
|
+
for child in node.children:
|
|
217
|
+
if child.type == "variable_declarator":
|
|
218
|
+
# Check if it's a function assignment
|
|
219
|
+
for subchild in child.children:
|
|
220
|
+
if subchild.type in ("arrow_function", "function"):
|
|
221
|
+
func_name = self._get_node_name(child)
|
|
222
|
+
if func_name:
|
|
223
|
+
start_line = child.start_point[0] + 1
|
|
224
|
+
end_line = child.end_point[0] + 1
|
|
225
|
+
|
|
226
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
227
|
+
docstring = self._extract_jsdoc_from_node(child, lines)
|
|
228
|
+
|
|
229
|
+
# Calculate complexity
|
|
230
|
+
complexity = self._calculate_complexity(
|
|
231
|
+
subchild, "javascript"
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Extract parameters
|
|
235
|
+
parameters = self._extract_js_parameters(subchild)
|
|
236
|
+
|
|
237
|
+
chunk = self._create_chunk(
|
|
238
|
+
content=content,
|
|
239
|
+
file_path=file_path,
|
|
240
|
+
start_line=start_line,
|
|
241
|
+
end_line=end_line,
|
|
242
|
+
chunk_type="function",
|
|
243
|
+
function_name=func_name,
|
|
244
|
+
class_name=class_name,
|
|
245
|
+
docstring=docstring,
|
|
246
|
+
complexity_score=complexity,
|
|
247
|
+
parameters=parameters,
|
|
248
|
+
chunk_depth=2 if class_name else 1,
|
|
249
|
+
)
|
|
250
|
+
chunks.append(chunk)
|
|
251
|
+
|
|
252
|
+
return chunks
|
|
253
|
+
|
|
254
|
+
def _extract_class(
|
|
255
|
+
self, node, lines: list[str], file_path: Path
|
|
256
|
+
) -> list[CodeChunk]:
|
|
257
|
+
"""Extract class declaration from AST."""
|
|
258
|
+
class_name = self._get_node_name(node)
|
|
259
|
+
if not class_name:
|
|
260
|
+
return []
|
|
261
|
+
|
|
262
|
+
start_line = node.start_point[0] + 1
|
|
263
|
+
end_line = node.end_point[0] + 1
|
|
264
|
+
|
|
265
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
266
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
267
|
+
|
|
268
|
+
# Calculate complexity
|
|
269
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
270
|
+
|
|
271
|
+
chunk = self._create_chunk(
|
|
272
|
+
content=content,
|
|
273
|
+
file_path=file_path,
|
|
274
|
+
start_line=start_line,
|
|
275
|
+
end_line=end_line,
|
|
276
|
+
chunk_type="class",
|
|
277
|
+
class_name=class_name,
|
|
278
|
+
docstring=docstring,
|
|
279
|
+
complexity_score=complexity,
|
|
280
|
+
chunk_depth=1,
|
|
281
|
+
)
|
|
282
|
+
return [chunk]
|
|
283
|
+
|
|
284
|
+
def _extract_method(
|
|
285
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
286
|
+
) -> list[CodeChunk]:
|
|
287
|
+
"""Extract method definition from class."""
|
|
288
|
+
method_name = self._get_node_name(node)
|
|
289
|
+
if not method_name:
|
|
290
|
+
return []
|
|
291
|
+
|
|
292
|
+
start_line = node.start_point[0] + 1
|
|
293
|
+
end_line = node.end_point[0] + 1
|
|
294
|
+
|
|
295
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
296
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
297
|
+
|
|
298
|
+
# Calculate complexity
|
|
299
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
300
|
+
|
|
301
|
+
# Extract parameters
|
|
302
|
+
parameters = self._extract_js_parameters(node)
|
|
303
|
+
|
|
304
|
+
# Check for decorators (TypeScript)
|
|
305
|
+
decorators = self._extract_decorators_from_node(node)
|
|
306
|
+
|
|
307
|
+
chunk = self._create_chunk(
|
|
308
|
+
content=content,
|
|
309
|
+
file_path=file_path,
|
|
310
|
+
start_line=start_line,
|
|
311
|
+
end_line=end_line,
|
|
312
|
+
chunk_type="method",
|
|
313
|
+
function_name=method_name,
|
|
314
|
+
class_name=class_name,
|
|
315
|
+
docstring=docstring,
|
|
316
|
+
complexity_score=complexity,
|
|
317
|
+
parameters=parameters,
|
|
318
|
+
decorators=decorators,
|
|
319
|
+
chunk_depth=2,
|
|
320
|
+
)
|
|
321
|
+
return [chunk]
|
|
322
|
+
|
|
323
|
+
def _get_node_name(self, node) -> str | None:
|
|
324
|
+
"""Extract name from a named node."""
|
|
325
|
+
for child in node.children:
|
|
326
|
+
if child.type in ("identifier", "property_identifier"):
|
|
327
|
+
return child.text.decode("utf-8")
|
|
328
|
+
return None
|
|
329
|
+
|
|
330
|
+
def _get_node_text(self, node) -> str:
|
|
331
|
+
"""Get text content of a node."""
|
|
332
|
+
if hasattr(node, "text"):
|
|
333
|
+
return node.text.decode("utf-8")
|
|
334
|
+
return ""
|
|
335
|
+
|
|
336
|
+
def _extract_js_parameters(self, node) -> list[dict]:
|
|
337
|
+
"""Extract function parameters from JavaScript/TypeScript AST."""
|
|
338
|
+
parameters = []
|
|
339
|
+
|
|
340
|
+
for child in node.children:
|
|
341
|
+
if child.type == "formal_parameters":
|
|
342
|
+
for param_node in child.children:
|
|
343
|
+
if param_node.type in (
|
|
344
|
+
"identifier",
|
|
345
|
+
"required_parameter",
|
|
346
|
+
"optional_parameter",
|
|
347
|
+
"rest_parameter",
|
|
348
|
+
):
|
|
349
|
+
param_info = {"name": None, "type": None, "default": None}
|
|
350
|
+
|
|
351
|
+
# Extract parameter details
|
|
352
|
+
if param_node.type == "identifier":
|
|
353
|
+
param_info["name"] = self._get_node_text(param_node)
|
|
354
|
+
else:
|
|
355
|
+
# TypeScript typed parameters
|
|
356
|
+
for subchild in param_node.children:
|
|
357
|
+
if subchild.type == "identifier":
|
|
358
|
+
param_info["name"] = self._get_node_text(subchild)
|
|
359
|
+
elif subchild.type == "type_annotation":
|
|
360
|
+
param_info["type"] = self._get_node_text(subchild)
|
|
361
|
+
elif (
|
|
362
|
+
"default" in subchild.type
|
|
363
|
+
or subchild.type == "number"
|
|
364
|
+
):
|
|
365
|
+
param_info["default"] = self._get_node_text(
|
|
366
|
+
subchild
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
if param_info["name"] and param_info["name"] not in (
|
|
370
|
+
"(",
|
|
371
|
+
")",
|
|
372
|
+
",",
|
|
373
|
+
"...",
|
|
374
|
+
):
|
|
375
|
+
# Clean up rest parameters
|
|
376
|
+
if param_info["name"].startswith("..."):
|
|
377
|
+
param_info["name"] = param_info["name"][3:]
|
|
378
|
+
param_info["rest"] = True
|
|
379
|
+
parameters.append(param_info)
|
|
380
|
+
|
|
381
|
+
return parameters
|
|
382
|
+
|
|
383
|
+
def _extract_decorators_from_node(self, node) -> list[str]:
|
|
384
|
+
"""Extract decorators from TypeScript node."""
|
|
385
|
+
decorators = []
|
|
386
|
+
|
|
387
|
+
for child in node.children:
|
|
388
|
+
if child.type == "decorator":
|
|
389
|
+
decorators.append(self._get_node_text(child))
|
|
390
|
+
|
|
391
|
+
return decorators
|
|
392
|
+
|
|
393
|
+
def _extract_jsdoc_from_node(self, node, lines: list[str]) -> str | None:
|
|
394
|
+
"""Extract JSDoc comment from before a node."""
|
|
395
|
+
start_line = node.start_point[0]
|
|
396
|
+
return self._extract_jsdoc(lines, start_line + 1)
|
|
397
|
+
|
|
398
|
+
async def _regex_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
399
|
+
"""Parse JavaScript/TypeScript using regex patterns."""
|
|
400
|
+
chunks = []
|
|
401
|
+
lines = self._split_into_lines(content)
|
|
402
|
+
|
|
403
|
+
# JavaScript/TypeScript patterns
|
|
404
|
+
function_patterns = [
|
|
405
|
+
re.compile(r"^\s*function\s+(\w+)\s*\(", re.MULTILINE), # function name()
|
|
406
|
+
re.compile(
|
|
407
|
+
r"^\s*const\s+(\w+)\s*=\s*\([^)]*\)\s*=>\s*{", re.MULTILINE
|
|
408
|
+
), # const name = () => {
|
|
409
|
+
re.compile(
|
|
410
|
+
r"^\s*const\s+(\w+)\s*=\s*function\s*\(", re.MULTILINE
|
|
411
|
+
), # const name = function(
|
|
412
|
+
re.compile(
|
|
413
|
+
r"^\s*(\w+)\s*:\s*function\s*\(", re.MULTILINE
|
|
414
|
+
), # name: function(
|
|
415
|
+
re.compile(r"^\s*(\w+)\s*\([^)]*\)\s*{", re.MULTILINE), # name() { (method)
|
|
416
|
+
re.compile(
|
|
417
|
+
r"^\s*async\s+function\s+(\w+)\s*\(", re.MULTILINE
|
|
418
|
+
), # async function name()
|
|
419
|
+
re.compile(
|
|
420
|
+
r"^\s*async\s+(\w+)\s*\([^)]*\)\s*{", re.MULTILINE
|
|
421
|
+
), # async name() {
|
|
422
|
+
]
|
|
423
|
+
|
|
424
|
+
class_patterns = [
|
|
425
|
+
re.compile(r"^\s*class\s+(\w+)", re.MULTILINE), # class Name
|
|
426
|
+
re.compile(
|
|
427
|
+
r"^\s*export\s+class\s+(\w+)", re.MULTILINE
|
|
428
|
+
), # export class Name
|
|
429
|
+
re.compile(
|
|
430
|
+
r"^\s*export\s+default\s+class\s+(\w+)", re.MULTILINE
|
|
431
|
+
), # export default class Name
|
|
432
|
+
]
|
|
433
|
+
|
|
434
|
+
interface_patterns = [
|
|
435
|
+
re.compile(
|
|
436
|
+
r"^\s*interface\s+(\w+)", re.MULTILINE
|
|
437
|
+
), # interface Name (TypeScript)
|
|
438
|
+
re.compile(
|
|
439
|
+
r"^\s*export\s+interface\s+(\w+)", re.MULTILINE
|
|
440
|
+
), # export interface Name
|
|
441
|
+
]
|
|
442
|
+
|
|
443
|
+
import_pattern = re.compile(r"^\s*(import|export).*", re.MULTILINE)
|
|
444
|
+
|
|
445
|
+
# Extract imports
|
|
446
|
+
imports = []
|
|
447
|
+
for match in import_pattern.finditer(content):
|
|
448
|
+
import_line = match.group(0).strip()
|
|
449
|
+
imports.append(import_line)
|
|
450
|
+
|
|
451
|
+
# Extract functions
|
|
452
|
+
for pattern in function_patterns:
|
|
453
|
+
for match in pattern.finditer(content):
|
|
454
|
+
function_name = match.group(1)
|
|
455
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
456
|
+
|
|
457
|
+
# Find end of function
|
|
458
|
+
end_line = self._find_block_end(lines, start_line, "{", "}")
|
|
459
|
+
|
|
460
|
+
func_content = self._get_line_range(lines, start_line, end_line)
|
|
461
|
+
|
|
462
|
+
if func_content.strip():
|
|
463
|
+
# Extract JSDoc comment
|
|
464
|
+
jsdoc = self._extract_jsdoc(lines, start_line)
|
|
465
|
+
|
|
466
|
+
chunk = self._create_chunk(
|
|
467
|
+
content=func_content,
|
|
468
|
+
file_path=file_path,
|
|
469
|
+
start_line=start_line,
|
|
470
|
+
end_line=end_line,
|
|
471
|
+
chunk_type="function",
|
|
472
|
+
function_name=function_name,
|
|
473
|
+
docstring=jsdoc,
|
|
474
|
+
)
|
|
475
|
+
chunk.imports = imports
|
|
476
|
+
chunks.append(chunk)
|
|
477
|
+
|
|
478
|
+
# Extract classes
|
|
479
|
+
for pattern in class_patterns:
|
|
480
|
+
for match in pattern.finditer(content):
|
|
481
|
+
class_name = match.group(1)
|
|
482
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
483
|
+
|
|
484
|
+
# Find end of class
|
|
485
|
+
end_line = self._find_block_end(lines, start_line, "{", "}")
|
|
486
|
+
|
|
487
|
+
class_content = self._get_line_range(lines, start_line, end_line)
|
|
488
|
+
|
|
489
|
+
if class_content.strip():
|
|
490
|
+
# Extract JSDoc comment
|
|
491
|
+
jsdoc = self._extract_jsdoc(lines, start_line)
|
|
492
|
+
|
|
493
|
+
chunk = self._create_chunk(
|
|
494
|
+
content=class_content,
|
|
495
|
+
file_path=file_path,
|
|
496
|
+
start_line=start_line,
|
|
497
|
+
end_line=end_line,
|
|
498
|
+
chunk_type="class",
|
|
499
|
+
class_name=class_name,
|
|
500
|
+
docstring=jsdoc,
|
|
501
|
+
)
|
|
502
|
+
chunk.imports = imports
|
|
503
|
+
chunks.append(chunk)
|
|
504
|
+
|
|
505
|
+
# Extract interfaces (TypeScript)
|
|
506
|
+
if self.language == "typescript":
|
|
507
|
+
for pattern in interface_patterns:
|
|
508
|
+
for match in pattern.finditer(content):
|
|
509
|
+
interface_name = match.group(1)
|
|
510
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
511
|
+
|
|
512
|
+
# Find end of interface
|
|
513
|
+
end_line = self._find_block_end(lines, start_line, "{", "}")
|
|
514
|
+
|
|
515
|
+
interface_content = self._get_line_range(
|
|
516
|
+
lines, start_line, end_line
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
if interface_content.strip():
|
|
520
|
+
# Extract JSDoc comment
|
|
521
|
+
jsdoc = self._extract_jsdoc(lines, start_line)
|
|
522
|
+
|
|
523
|
+
chunk = self._create_chunk(
|
|
524
|
+
content=interface_content,
|
|
525
|
+
file_path=file_path,
|
|
526
|
+
start_line=start_line,
|
|
527
|
+
end_line=end_line,
|
|
528
|
+
chunk_type="interface",
|
|
529
|
+
class_name=interface_name, # Use class_name field for interface
|
|
530
|
+
docstring=jsdoc,
|
|
531
|
+
)
|
|
532
|
+
chunk.imports = imports
|
|
533
|
+
chunks.append(chunk)
|
|
534
|
+
|
|
535
|
+
# If no specific chunks found, create a single chunk for the whole file
|
|
536
|
+
if not chunks:
|
|
537
|
+
chunks.append(
|
|
538
|
+
self._create_chunk(
|
|
539
|
+
content=content,
|
|
540
|
+
file_path=file_path,
|
|
541
|
+
start_line=1,
|
|
542
|
+
end_line=len(lines),
|
|
543
|
+
chunk_type="module",
|
|
544
|
+
)
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
return chunks
|
|
548
|
+
|
|
549
|
+
def _find_block_end(
|
|
550
|
+
self, lines: list[str], start_line: int, open_char: str, close_char: str
|
|
551
|
+
) -> int:
|
|
552
|
+
"""Find the end of a block by matching braces."""
|
|
553
|
+
if start_line > len(lines):
|
|
554
|
+
return len(lines)
|
|
555
|
+
|
|
556
|
+
brace_count = 0
|
|
557
|
+
found_opening = False
|
|
558
|
+
|
|
559
|
+
for i in range(start_line - 1, len(lines)):
|
|
560
|
+
line = lines[i]
|
|
561
|
+
|
|
562
|
+
for char in line:
|
|
563
|
+
if char == open_char:
|
|
564
|
+
brace_count += 1
|
|
565
|
+
found_opening = True
|
|
566
|
+
elif char == close_char:
|
|
567
|
+
brace_count -= 1
|
|
568
|
+
|
|
569
|
+
if found_opening and brace_count == 0:
|
|
570
|
+
return i + 1 # Return 1-based line number
|
|
571
|
+
|
|
572
|
+
return len(lines)
|
|
573
|
+
|
|
574
|
+
def _extract_jsdoc(self, lines: list[str], start_line: int) -> str | None:
|
|
575
|
+
"""Extract JSDoc comment before a function/class."""
|
|
576
|
+
if start_line <= 1:
|
|
577
|
+
return None
|
|
578
|
+
|
|
579
|
+
# Look backwards for JSDoc comment
|
|
580
|
+
for i in range(start_line - 2, max(-1, start_line - 10), -1):
|
|
581
|
+
line = lines[i].strip()
|
|
582
|
+
|
|
583
|
+
if line.endswith("*/"):
|
|
584
|
+
# Found end of JSDoc, collect the comment
|
|
585
|
+
jsdoc_lines = []
|
|
586
|
+
for j in range(i, -1, -1):
|
|
587
|
+
comment_line = lines[j].strip()
|
|
588
|
+
jsdoc_lines.insert(0, comment_line)
|
|
589
|
+
|
|
590
|
+
if comment_line.startswith("/**"):
|
|
591
|
+
# Found start of JSDoc
|
|
592
|
+
# Clean up the comment
|
|
593
|
+
cleaned_lines = []
|
|
594
|
+
for line in jsdoc_lines:
|
|
595
|
+
# Remove /** */ and * prefixes
|
|
596
|
+
cleaned = (
|
|
597
|
+
line.replace("/**", "")
|
|
598
|
+
.replace("*/", "")
|
|
599
|
+
.replace("*", "")
|
|
600
|
+
.strip()
|
|
601
|
+
)
|
|
602
|
+
if cleaned:
|
|
603
|
+
cleaned_lines.append(cleaned)
|
|
604
|
+
|
|
605
|
+
return " ".join(cleaned_lines) if cleaned_lines else None
|
|
606
|
+
|
|
607
|
+
# If we hit non-comment code, stop looking
|
|
608
|
+
elif line and not line.startswith("//") and not line.startswith("*"):
|
|
609
|
+
break
|
|
610
|
+
|
|
611
|
+
return None
|
|
612
|
+
|
|
613
|
+
def get_supported_extensions(self) -> list[str]:
|
|
614
|
+
"""Get supported file extensions."""
|
|
615
|
+
if self.language == "typescript":
|
|
616
|
+
return [".ts", ".tsx"]
|
|
617
|
+
else:
|
|
618
|
+
return [".js", ".jsx", ".mjs"]
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
class TypeScriptParser(JavaScriptParser):
|
|
622
|
+
"""TypeScript parser extending JavaScript parser."""
|
|
623
|
+
|
|
624
|
+
def __init__(self) -> None:
|
|
625
|
+
"""Initialize TypeScript parser."""
|
|
626
|
+
super().__init__("typescript")
|
|
627
|
+
|
|
628
|
+
def _initialize_parser(self) -> None:
|
|
629
|
+
"""Initialize Tree-sitter parser for TypeScript."""
|
|
630
|
+
try:
|
|
631
|
+
from tree_sitter_language_pack import get_language, get_parser
|
|
632
|
+
|
|
633
|
+
self._language = get_language("typescript")
|
|
634
|
+
self._parser = get_parser("typescript")
|
|
635
|
+
|
|
636
|
+
logger.debug(
|
|
637
|
+
"TypeScript Tree-sitter parser initialized via tree-sitter-language-pack"
|
|
638
|
+
)
|
|
639
|
+
self._use_tree_sitter = True
|
|
640
|
+
return
|
|
641
|
+
except Exception as e:
|
|
642
|
+
logger.debug(f"tree-sitter-language-pack failed: {e}, using regex fallback")
|
|
643
|
+
self._use_tree_sitter = False
|