mcp-vector-search 0.7.5__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

@@ -281,24 +281,27 @@ class ProjectManager:
281
281
  continue
282
282
 
283
283
  # Skip ignored patterns
284
- if self._should_ignore_path(path):
284
+ # PERFORMANCE: Pass is_directory=False since we already checked is_file()
285
+ if self._should_ignore_path(path, is_directory=False):
285
286
  continue
286
287
 
287
288
  files.append(path)
288
289
 
289
290
  return files
290
291
 
291
- def _should_ignore_path(self, path: Path) -> bool:
292
+ def _should_ignore_path(self, path: Path, is_directory: bool | None = None) -> bool:
292
293
  """Check if a path should be ignored.
293
294
 
294
295
  Args:
295
296
  path: Path to check
297
+ is_directory: Optional hint if path is a directory (avoids filesystem check)
296
298
 
297
299
  Returns:
298
300
  True if path should be ignored
299
301
  """
300
302
  # First check gitignore rules if available
301
- if self.gitignore_parser and self.gitignore_parser.is_ignored(path):
303
+ # PERFORMANCE: Pass is_directory hint to avoid redundant stat() calls
304
+ if self.gitignore_parser and self.gitignore_parser.is_ignored(path, is_directory=is_directory):
302
305
  return True
303
306
 
304
307
  # Check if any parent directory is in ignore patterns
@@ -64,6 +64,68 @@ class BaseParser(ABC):
64
64
  """
65
65
  ...
66
66
 
67
+ def _calculate_complexity(self, node, language: str | None = None) -> float:
68
+ """Calculate cyclomatic complexity from AST node.
69
+
70
+ Cyclomatic complexity = Number of decision points + 1
71
+
72
+ Args:
73
+ node: AST node (tree-sitter)
74
+ language: Programming language for language-specific patterns (defaults to self.language)
75
+
76
+ Returns:
77
+ Complexity score (1.0 = simple, 10+ = complex)
78
+ """
79
+ if language is None:
80
+ language = self.language
81
+
82
+ if not hasattr(node, 'children'):
83
+ return 1.0
84
+
85
+ complexity = 1.0 # Base complexity
86
+
87
+ # Language-specific decision node types
88
+ decision_nodes = {
89
+ "python": {
90
+ "if_statement", "elif_clause", "while_statement", "for_statement",
91
+ "except_clause", "with_statement", "conditional_expression",
92
+ "boolean_operator" # and, or
93
+ },
94
+ "javascript": {
95
+ "if_statement", "while_statement", "for_statement", "for_in_statement",
96
+ "switch_case", "catch_clause", "conditional_expression", "ternary_expression"
97
+ },
98
+ "typescript": {
99
+ "if_statement", "while_statement", "for_statement", "for_in_statement",
100
+ "switch_case", "catch_clause", "conditional_expression", "ternary_expression"
101
+ },
102
+ "dart": {
103
+ "if_statement", "while_statement", "for_statement", "for_in_statement",
104
+ "switch_case", "catch_clause", "conditional_expression"
105
+ },
106
+ "php": {
107
+ "if_statement", "elseif_clause", "while_statement", "foreach_statement",
108
+ "for_statement", "switch_case", "catch_clause", "ternary_expression"
109
+ },
110
+ "ruby": {
111
+ "if", "unless", "while", "until", "for", "case", "rescue",
112
+ "conditional"
113
+ }
114
+ }
115
+
116
+ nodes_to_count = decision_nodes.get(language, decision_nodes.get("python", set()))
117
+
118
+ def count_decision_points(n):
119
+ nonlocal complexity
120
+ if hasattr(n, 'type') and n.type in nodes_to_count:
121
+ complexity += 1
122
+ if hasattr(n, 'children'):
123
+ for child in n.children:
124
+ count_decision_points(child)
125
+
126
+ count_decision_points(node)
127
+ return complexity
128
+
67
129
  def _create_chunk(
68
130
  self,
69
131
  content: str,
@@ -74,6 +136,13 @@ class BaseParser(ABC):
74
136
  function_name: str | None = None,
75
137
  class_name: str | None = None,
76
138
  docstring: str | None = None,
139
+ complexity_score: float = 0.0,
140
+ decorators: list[str] | None = None,
141
+ parameters: list[dict] | None = None,
142
+ return_type: str | None = None,
143
+ chunk_id: str | None = None,
144
+ parent_chunk_id: str | None = None,
145
+ chunk_depth: int = 0,
77
146
  ) -> CodeChunk:
78
147
  """Create a code chunk with metadata.
79
148
 
@@ -86,6 +155,13 @@ class BaseParser(ABC):
86
155
  function_name: Function name if applicable
87
156
  class_name: Class name if applicable
88
157
  docstring: Docstring if applicable
158
+ complexity_score: Cyclomatic complexity score
159
+ decorators: List of decorators/annotations
160
+ parameters: List of function parameters with metadata
161
+ return_type: Return type annotation
162
+ chunk_id: Unique chunk identifier
163
+ parent_chunk_id: Parent chunk ID for hierarchical relationships
164
+ chunk_depth: Nesting level in code hierarchy
89
165
 
90
166
  Returns:
91
167
  CodeChunk instance
@@ -100,6 +176,13 @@ class BaseParser(ABC):
100
176
  function_name=function_name,
101
177
  class_name=class_name,
102
178
  docstring=docstring,
179
+ complexity_score=complexity_score,
180
+ decorators=decorators or [],
181
+ parameters=parameters or [],
182
+ return_type=return_type,
183
+ chunk_id=chunk_id,
184
+ parent_chunk_id=parent_chunk_id,
185
+ chunk_depth=chunk_depth,
103
186
  )
104
187
 
105
188
  def _split_into_lines(self, content: str) -> list[str]:
@@ -10,11 +10,32 @@ from .base import BaseParser
10
10
 
11
11
 
12
12
  class JavaScriptParser(BaseParser):
13
- """JavaScript/TypeScript parser with fallback regex-based parsing."""
13
+ """JavaScript parser with tree-sitter AST support and fallback regex parsing."""
14
14
 
15
15
  def __init__(self, language: str = "javascript") -> None:
16
16
  """Initialize JavaScript parser."""
17
17
  super().__init__(language)
18
+ self._parser = None
19
+ self._language = None
20
+ self._use_tree_sitter = False
21
+ self._initialize_parser()
22
+
23
+ def _initialize_parser(self) -> None:
24
+ """Initialize Tree-sitter parser for JavaScript."""
25
+ try:
26
+ from tree_sitter_language_pack import get_language, get_parser
27
+
28
+ self._language = get_language("javascript")
29
+ self._parser = get_parser("javascript")
30
+
31
+ logger.debug(
32
+ "JavaScript Tree-sitter parser initialized via tree-sitter-language-pack"
33
+ )
34
+ self._use_tree_sitter = True
35
+ return
36
+ except Exception as e:
37
+ logger.debug(f"tree-sitter-language-pack failed: {e}, using regex fallback")
38
+ self._use_tree_sitter = False
18
39
 
19
40
  async def parse_file(self, file_path: Path) -> list[CodeChunk]:
20
41
  """Parse a JavaScript/TypeScript file and extract code chunks."""
@@ -31,7 +52,317 @@ class JavaScriptParser(BaseParser):
31
52
  if not content.strip():
32
53
  return []
33
54
 
34
- return await self._regex_parse(content, file_path)
55
+ if self._use_tree_sitter:
56
+ try:
57
+ tree = self._parser.parse(content.encode('utf-8'))
58
+ return self._extract_chunks_from_tree(tree, content, file_path)
59
+ except Exception as e:
60
+ logger.warning(f"Tree-sitter parsing failed for {file_path}: {e}")
61
+ return await self._regex_parse(content, file_path)
62
+ else:
63
+ return await self._regex_parse(content, file_path)
64
+
65
+ def _extract_chunks_from_tree(
66
+ self, tree, content: str, file_path: Path
67
+ ) -> list[CodeChunk]:
68
+ """Extract code chunks from JavaScript AST."""
69
+ chunks = []
70
+ lines = self._split_into_lines(content)
71
+
72
+ def visit_node(node, current_class=None):
73
+ """Recursively visit AST nodes."""
74
+ node_type = node.type
75
+
76
+ if node_type == "function_declaration":
77
+ chunks.extend(self._extract_function(node, lines, file_path, current_class))
78
+ elif node_type == "arrow_function":
79
+ chunks.extend(self._extract_arrow_function(node, lines, file_path, current_class))
80
+ elif node_type == "class_declaration":
81
+ class_chunks = self._extract_class(node, lines, file_path)
82
+ chunks.extend(class_chunks)
83
+
84
+ # Visit class methods
85
+ class_name = self._get_node_name(node)
86
+ for child in node.children:
87
+ visit_node(child, class_name)
88
+ elif node_type == "method_definition":
89
+ chunks.extend(self._extract_method(node, lines, file_path, current_class))
90
+ elif node_type == "lexical_declaration":
91
+ # const/let declarations might be arrow functions
92
+ chunks.extend(self._extract_variable_function(node, lines, file_path, current_class))
93
+
94
+ # Recurse into children
95
+ if hasattr(node, 'children'):
96
+ for child in node.children:
97
+ if child.type not in ("class_declaration", "function_declaration"):
98
+ visit_node(child, current_class)
99
+
100
+ visit_node(tree.root_node)
101
+
102
+ # If no specific chunks found, create a single chunk for the whole file
103
+ if not chunks:
104
+ chunks.append(
105
+ self._create_chunk(
106
+ content=content,
107
+ file_path=file_path,
108
+ start_line=1,
109
+ end_line=len(lines),
110
+ chunk_type="module",
111
+ )
112
+ )
113
+
114
+ return chunks
115
+
116
+ def _extract_function(
117
+ self, node, lines: list[str], file_path: Path, class_name: str | None = None
118
+ ) -> list[CodeChunk]:
119
+ """Extract function declaration from AST."""
120
+ function_name = self._get_node_name(node)
121
+ if not function_name:
122
+ return []
123
+
124
+ start_line = node.start_point[0] + 1
125
+ end_line = node.end_point[0] + 1
126
+
127
+ content = self._get_line_range(lines, start_line, end_line)
128
+ docstring = self._extract_jsdoc_from_node(node, lines)
129
+
130
+ # Calculate complexity
131
+ complexity = self._calculate_complexity(node, "javascript")
132
+
133
+ # Extract parameters
134
+ parameters = self._extract_js_parameters(node)
135
+
136
+ chunk = self._create_chunk(
137
+ content=content,
138
+ file_path=file_path,
139
+ start_line=start_line,
140
+ end_line=end_line,
141
+ chunk_type="function",
142
+ function_name=function_name,
143
+ class_name=class_name,
144
+ docstring=docstring,
145
+ complexity_score=complexity,
146
+ parameters=parameters,
147
+ chunk_depth=2 if class_name else 1,
148
+ )
149
+ return [chunk]
150
+
151
+ def _extract_arrow_function(
152
+ self, node, lines: list[str], file_path: Path, class_name: str | None = None
153
+ ) -> list[CodeChunk]:
154
+ """Extract arrow function from AST."""
155
+ # Arrow functions often don't have explicit names, try to get from parent
156
+ parent = getattr(node, 'parent', None)
157
+ function_name = None
158
+
159
+ if parent and parent.type == "variable_declarator":
160
+ function_name = self._get_node_name(parent)
161
+
162
+ if not function_name:
163
+ return []
164
+
165
+ start_line = node.start_point[0] + 1
166
+ end_line = node.end_point[0] + 1
167
+
168
+ content = self._get_line_range(lines, start_line, end_line)
169
+ docstring = self._extract_jsdoc_from_node(node, lines)
170
+
171
+ # Calculate complexity
172
+ complexity = self._calculate_complexity(node, "javascript")
173
+
174
+ # Extract parameters
175
+ parameters = self._extract_js_parameters(node)
176
+
177
+ chunk = self._create_chunk(
178
+ content=content,
179
+ file_path=file_path,
180
+ start_line=start_line,
181
+ end_line=end_line,
182
+ chunk_type="function",
183
+ function_name=function_name,
184
+ class_name=class_name,
185
+ docstring=docstring,
186
+ complexity_score=complexity,
187
+ parameters=parameters,
188
+ chunk_depth=2 if class_name else 1,
189
+ )
190
+ return [chunk]
191
+
192
+ def _extract_variable_function(
193
+ self, node, lines: list[str], file_path: Path, class_name: str | None = None
194
+ ) -> list[CodeChunk]:
195
+ """Extract function from variable declaration (const func = ...)."""
196
+ chunks = []
197
+
198
+ for child in node.children:
199
+ if child.type == "variable_declarator":
200
+ # Check if it's a function assignment
201
+ for subchild in child.children:
202
+ if subchild.type in ("arrow_function", "function"):
203
+ func_name = self._get_node_name(child)
204
+ if func_name:
205
+ start_line = child.start_point[0] + 1
206
+ end_line = child.end_point[0] + 1
207
+
208
+ content = self._get_line_range(lines, start_line, end_line)
209
+ docstring = self._extract_jsdoc_from_node(child, lines)
210
+
211
+ # Calculate complexity
212
+ complexity = self._calculate_complexity(subchild, "javascript")
213
+
214
+ # Extract parameters
215
+ parameters = self._extract_js_parameters(subchild)
216
+
217
+ chunk = self._create_chunk(
218
+ content=content,
219
+ file_path=file_path,
220
+ start_line=start_line,
221
+ end_line=end_line,
222
+ chunk_type="function",
223
+ function_name=func_name,
224
+ class_name=class_name,
225
+ docstring=docstring,
226
+ complexity_score=complexity,
227
+ parameters=parameters,
228
+ chunk_depth=2 if class_name else 1,
229
+ )
230
+ chunks.append(chunk)
231
+
232
+ return chunks
233
+
234
+ def _extract_class(
235
+ self, node, lines: list[str], file_path: Path
236
+ ) -> list[CodeChunk]:
237
+ """Extract class declaration from AST."""
238
+ class_name = self._get_node_name(node)
239
+ if not class_name:
240
+ return []
241
+
242
+ start_line = node.start_point[0] + 1
243
+ end_line = node.end_point[0] + 1
244
+
245
+ content = self._get_line_range(lines, start_line, end_line)
246
+ docstring = self._extract_jsdoc_from_node(node, lines)
247
+
248
+ # Calculate complexity
249
+ complexity = self._calculate_complexity(node, "javascript")
250
+
251
+ chunk = self._create_chunk(
252
+ content=content,
253
+ file_path=file_path,
254
+ start_line=start_line,
255
+ end_line=end_line,
256
+ chunk_type="class",
257
+ class_name=class_name,
258
+ docstring=docstring,
259
+ complexity_score=complexity,
260
+ chunk_depth=1,
261
+ )
262
+ return [chunk]
263
+
264
+ def _extract_method(
265
+ self, node, lines: list[str], file_path: Path, class_name: str | None = None
266
+ ) -> list[CodeChunk]:
267
+ """Extract method definition from class."""
268
+ method_name = self._get_node_name(node)
269
+ if not method_name:
270
+ return []
271
+
272
+ start_line = node.start_point[0] + 1
273
+ end_line = node.end_point[0] + 1
274
+
275
+ content = self._get_line_range(lines, start_line, end_line)
276
+ docstring = self._extract_jsdoc_from_node(node, lines)
277
+
278
+ # Calculate complexity
279
+ complexity = self._calculate_complexity(node, "javascript")
280
+
281
+ # Extract parameters
282
+ parameters = self._extract_js_parameters(node)
283
+
284
+ # Check for decorators (TypeScript)
285
+ decorators = self._extract_decorators_from_node(node)
286
+
287
+ chunk = self._create_chunk(
288
+ content=content,
289
+ file_path=file_path,
290
+ start_line=start_line,
291
+ end_line=end_line,
292
+ chunk_type="method",
293
+ function_name=method_name,
294
+ class_name=class_name,
295
+ docstring=docstring,
296
+ complexity_score=complexity,
297
+ parameters=parameters,
298
+ decorators=decorators,
299
+ chunk_depth=2,
300
+ )
301
+ return [chunk]
302
+
303
+ def _get_node_name(self, node) -> str | None:
304
+ """Extract name from a named node."""
305
+ for child in node.children:
306
+ if child.type in ("identifier", "property_identifier"):
307
+ return child.text.decode("utf-8")
308
+ return None
309
+
310
+ def _get_node_text(self, node) -> str:
311
+ """Get text content of a node."""
312
+ if hasattr(node, 'text'):
313
+ return node.text.decode('utf-8')
314
+ return ""
315
+
316
+ def _extract_js_parameters(self, node) -> list[dict]:
317
+ """Extract function parameters from JavaScript/TypeScript AST."""
318
+ parameters = []
319
+
320
+ for child in node.children:
321
+ if child.type == "formal_parameters":
322
+ for param_node in child.children:
323
+ if param_node.type in ("identifier", "required_parameter", "optional_parameter", "rest_parameter"):
324
+ param_info = {
325
+ "name": None,
326
+ "type": None,
327
+ "default": None
328
+ }
329
+
330
+ # Extract parameter details
331
+ if param_node.type == "identifier":
332
+ param_info["name"] = self._get_node_text(param_node)
333
+ else:
334
+ # TypeScript typed parameters
335
+ for subchild in param_node.children:
336
+ if subchild.type == "identifier":
337
+ param_info["name"] = self._get_node_text(subchild)
338
+ elif subchild.type == "type_annotation":
339
+ param_info["type"] = self._get_node_text(subchild)
340
+ elif "default" in subchild.type or subchild.type == "number":
341
+ param_info["default"] = self._get_node_text(subchild)
342
+
343
+ if param_info["name"] and param_info["name"] not in ("(", ")", ",", "..."):
344
+ # Clean up rest parameters
345
+ if param_info["name"].startswith("..."):
346
+ param_info["name"] = param_info["name"][3:]
347
+ param_info["rest"] = True
348
+ parameters.append(param_info)
349
+
350
+ return parameters
351
+
352
+ def _extract_decorators_from_node(self, node) -> list[str]:
353
+ """Extract decorators from TypeScript node."""
354
+ decorators = []
355
+
356
+ for child in node.children:
357
+ if child.type == "decorator":
358
+ decorators.append(self._get_node_text(child))
359
+
360
+ return decorators
361
+
362
+ def _extract_jsdoc_from_node(self, node, lines: list[str]) -> str | None:
363
+ """Extract JSDoc comment from before a node."""
364
+ start_line = node.start_point[0]
365
+ return self._extract_jsdoc(lines, start_line + 1)
35
366
 
36
367
  async def _regex_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
37
368
  """Parse JavaScript/TypeScript using regex patterns."""
@@ -262,3 +593,20 @@ class TypeScriptParser(JavaScriptParser):
262
593
  def __init__(self) -> None:
263
594
  """Initialize TypeScript parser."""
264
595
  super().__init__("typescript")
596
+
597
+ def _initialize_parser(self) -> None:
598
+ """Initialize Tree-sitter parser for TypeScript."""
599
+ try:
600
+ from tree_sitter_language_pack import get_language, get_parser
601
+
602
+ self._language = get_language("typescript")
603
+ self._parser = get_parser("typescript")
604
+
605
+ logger.debug(
606
+ "TypeScript Tree-sitter parser initialized via tree-sitter-language-pack"
607
+ )
608
+ self._use_tree_sitter = True
609
+ return
610
+ except Exception as e:
611
+ logger.debug(f"tree-sitter-language-pack failed: {e}, using regex fallback")
612
+ self._use_tree_sitter = False
@@ -150,6 +150,18 @@ class PythonParser(BaseParser):
150
150
  # Extract docstring if present
151
151
  docstring = self._extract_docstring(node, lines)
152
152
 
153
+ # Enhancement 1: Calculate complexity
154
+ complexity = self._calculate_complexity(node, "python")
155
+
156
+ # Enhancement 4: Extract decorators
157
+ decorators = self._extract_decorators(node, lines)
158
+
159
+ # Enhancement 4: Extract parameters
160
+ parameters = self._extract_parameters(node)
161
+
162
+ # Enhancement 4: Extract return type
163
+ return_type = self._extract_return_type(node)
164
+
153
165
  chunk = self._create_chunk(
154
166
  content=content,
155
167
  file_path=file_path,
@@ -159,6 +171,11 @@ class PythonParser(BaseParser):
159
171
  function_name=function_name,
160
172
  class_name=class_name,
161
173
  docstring=docstring,
174
+ complexity_score=complexity,
175
+ decorators=decorators,
176
+ parameters=parameters,
177
+ return_type=return_type,
178
+ chunk_depth=2 if class_name else 1,
162
179
  )
163
180
  chunks.append(chunk)
164
181
 
@@ -180,6 +197,12 @@ class PythonParser(BaseParser):
180
197
  # Extract docstring if present
181
198
  docstring = self._extract_docstring(node, lines)
182
199
 
200
+ # Enhancement 1: Calculate complexity (for the entire class)
201
+ complexity = self._calculate_complexity(node, "python")
202
+
203
+ # Enhancement 4: Extract decorators
204
+ decorators = self._extract_decorators(node, lines)
205
+
183
206
  chunk = self._create_chunk(
184
207
  content=content,
185
208
  file_path=file_path,
@@ -188,6 +211,9 @@ class PythonParser(BaseParser):
188
211
  chunk_type="class",
189
212
  class_name=class_name,
190
213
  docstring=docstring,
214
+ complexity_score=complexity,
215
+ decorators=decorators,
216
+ chunk_depth=1,
191
217
  )
192
218
  chunks.append(chunk)
193
219
 
@@ -410,6 +436,59 @@ class PythonParser(BaseParser):
410
436
 
411
437
  return None
412
438
 
439
+ def _extract_decorators(self, node, lines: list[str]) -> list[str]:
440
+ """Extract decorator names from function/class node."""
441
+ decorators = []
442
+ for child in node.children:
443
+ if child.type == "decorator":
444
+ # Get decorator text (includes @ symbol)
445
+ dec_text = self._get_node_text(child).strip()
446
+ decorators.append(dec_text)
447
+ return decorators
448
+
449
+ def _extract_parameters(self, node) -> list[dict]:
450
+ """Extract function parameters with type annotations."""
451
+ parameters = []
452
+ for child in node.children:
453
+ if child.type == "parameters":
454
+ for param_node in child.children:
455
+ if param_node.type in ("identifier", "typed_parameter", "default_parameter"):
456
+ param_info = {
457
+ "name": None,
458
+ "type": None,
459
+ "default": None
460
+ }
461
+
462
+ # Extract parameter name
463
+ if param_node.type == "identifier":
464
+ param_info["name"] = self._get_node_text(param_node)
465
+ else:
466
+ # For typed or default parameters, find the identifier
467
+ for subchild in param_node.children:
468
+ if subchild.type == "identifier":
469
+ param_info["name"] = self._get_node_text(subchild)
470
+ elif subchild.type == "type":
471
+ param_info["type"] = self._get_node_text(subchild)
472
+ elif "default" in subchild.type:
473
+ param_info["default"] = self._get_node_text(subchild)
474
+
475
+ if param_info["name"] and param_info["name"] not in ("self", "cls", "(", ")", ","):
476
+ parameters.append(param_info)
477
+ return parameters
478
+
479
+ def _extract_return_type(self, node) -> str | None:
480
+ """Extract return type annotation from function."""
481
+ for child in node.children:
482
+ if child.type == "type":
483
+ return self._get_node_text(child)
484
+ return None
485
+
486
+ def _get_node_text(self, node) -> str:
487
+ """Get text content of a node."""
488
+ if hasattr(node, 'text'):
489
+ return node.text.decode('utf-8')
490
+ return ""
491
+
413
492
  def get_supported_extensions(self) -> list[str]:
414
493
  """Get supported file extensions."""
415
494
  return [".py", ".pyw"]