mcp-vector-search 0.15.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (86) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/chat.py +534 -0
  6. mcp_vector_search/cli/commands/config.py +393 -0
  7. mcp_vector_search/cli/commands/demo.py +358 -0
  8. mcp_vector_search/cli/commands/index.py +762 -0
  9. mcp_vector_search/cli/commands/init.py +658 -0
  10. mcp_vector_search/cli/commands/install.py +869 -0
  11. mcp_vector_search/cli/commands/install_old.py +700 -0
  12. mcp_vector_search/cli/commands/mcp.py +1254 -0
  13. mcp_vector_search/cli/commands/reset.py +393 -0
  14. mcp_vector_search/cli/commands/search.py +796 -0
  15. mcp_vector_search/cli/commands/setup.py +1133 -0
  16. mcp_vector_search/cli/commands/status.py +584 -0
  17. mcp_vector_search/cli/commands/uninstall.py +404 -0
  18. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  19. mcp_vector_search/cli/commands/visualize/cli.py +265 -0
  20. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  21. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  22. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  23. mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
  24. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  25. mcp_vector_search/cli/commands/visualize/server.py +201 -0
  26. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  27. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  28. mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
  29. mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
  30. mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
  31. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  32. mcp_vector_search/cli/commands/watch.py +287 -0
  33. mcp_vector_search/cli/didyoumean.py +520 -0
  34. mcp_vector_search/cli/export.py +320 -0
  35. mcp_vector_search/cli/history.py +295 -0
  36. mcp_vector_search/cli/interactive.py +342 -0
  37. mcp_vector_search/cli/main.py +484 -0
  38. mcp_vector_search/cli/output.py +414 -0
  39. mcp_vector_search/cli/suggestions.py +375 -0
  40. mcp_vector_search/config/__init__.py +1 -0
  41. mcp_vector_search/config/constants.py +24 -0
  42. mcp_vector_search/config/defaults.py +200 -0
  43. mcp_vector_search/config/settings.py +146 -0
  44. mcp_vector_search/core/__init__.py +1 -0
  45. mcp_vector_search/core/auto_indexer.py +298 -0
  46. mcp_vector_search/core/config_utils.py +394 -0
  47. mcp_vector_search/core/connection_pool.py +360 -0
  48. mcp_vector_search/core/database.py +1237 -0
  49. mcp_vector_search/core/directory_index.py +318 -0
  50. mcp_vector_search/core/embeddings.py +294 -0
  51. mcp_vector_search/core/exceptions.py +89 -0
  52. mcp_vector_search/core/factory.py +318 -0
  53. mcp_vector_search/core/git_hooks.py +345 -0
  54. mcp_vector_search/core/indexer.py +1002 -0
  55. mcp_vector_search/core/llm_client.py +453 -0
  56. mcp_vector_search/core/models.py +294 -0
  57. mcp_vector_search/core/project.py +350 -0
  58. mcp_vector_search/core/scheduler.py +330 -0
  59. mcp_vector_search/core/search.py +952 -0
  60. mcp_vector_search/core/watcher.py +322 -0
  61. mcp_vector_search/mcp/__init__.py +5 -0
  62. mcp_vector_search/mcp/__main__.py +25 -0
  63. mcp_vector_search/mcp/server.py +752 -0
  64. mcp_vector_search/parsers/__init__.py +8 -0
  65. mcp_vector_search/parsers/base.py +296 -0
  66. mcp_vector_search/parsers/dart.py +605 -0
  67. mcp_vector_search/parsers/html.py +413 -0
  68. mcp_vector_search/parsers/javascript.py +643 -0
  69. mcp_vector_search/parsers/php.py +694 -0
  70. mcp_vector_search/parsers/python.py +502 -0
  71. mcp_vector_search/parsers/registry.py +223 -0
  72. mcp_vector_search/parsers/ruby.py +678 -0
  73. mcp_vector_search/parsers/text.py +186 -0
  74. mcp_vector_search/parsers/utils.py +265 -0
  75. mcp_vector_search/py.typed +1 -0
  76. mcp_vector_search/utils/__init__.py +42 -0
  77. mcp_vector_search/utils/gitignore.py +250 -0
  78. mcp_vector_search/utils/gitignore_updater.py +212 -0
  79. mcp_vector_search/utils/monorepo.py +339 -0
  80. mcp_vector_search/utils/timing.py +338 -0
  81. mcp_vector_search/utils/version.py +47 -0
  82. mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
  83. mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
  84. mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
  85. mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
  86. mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,502 @@
1
+ """Python parser using Tree-sitter for MCP Vector Search."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+ from loguru import logger
7
+
8
+ from ..core.models import CodeChunk
9
+ from .base import BaseParser
10
+
11
+
12
+ class PythonParser(BaseParser):
13
+ """Python parser using Tree-sitter for AST-based code analysis."""
14
+
15
+ def __init__(self) -> None:
16
+ """Initialize Python parser."""
17
+ super().__init__("python")
18
+ self._parser = None
19
+ self._language = None
20
+ self._initialize_parser()
21
+
22
+ def _initialize_parser(self) -> None:
23
+ """Initialize Tree-sitter parser for Python."""
24
+ try:
25
+ # Try the tree-sitter-language-pack package (maintained alternative)
26
+ from tree_sitter_language_pack import get_language, get_parser
27
+
28
+ # Get the language and parser objects
29
+ self._language = get_language("python")
30
+ self._parser = get_parser("python")
31
+
32
+ logger.debug(
33
+ "Python Tree-sitter parser initialized via tree-sitter-language-pack"
34
+ )
35
+ return
36
+ except Exception as e:
37
+ logger.debug(f"tree-sitter-language-pack failed: {e}")
38
+
39
+ try:
40
+ # Fallback to manual tree-sitter setup (requires language binaries)
41
+
42
+ # This would require language binaries to be available
43
+ # For now, we'll skip this and rely on fallback parsing
44
+ logger.debug("Manual tree-sitter setup not implemented yet")
45
+ self._parser = None
46
+ self._language = None
47
+ except Exception as e:
48
+ logger.debug(f"Manual tree-sitter setup failed: {e}")
49
+ self._parser = None
50
+ self._language = None
51
+
52
+ logger.info(
53
+ "Using fallback regex-based parsing for Python (Tree-sitter unavailable)"
54
+ )
55
+
56
+ async def parse_file(self, file_path: Path) -> list[CodeChunk]:
57
+ """Parse a Python file and extract code chunks."""
58
+ try:
59
+ with open(file_path, encoding="utf-8") as f:
60
+ content = f.read()
61
+ return await self.parse_content(content, file_path)
62
+ except Exception as e:
63
+ logger.error(f"Failed to read file {file_path}: {e}")
64
+ return []
65
+
66
+ async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
67
+ """Parse Python content and extract code chunks."""
68
+ if not content.strip():
69
+ return []
70
+
71
+ # If Tree-sitter is not available, fall back to simple parsing
72
+ if not self._parser:
73
+ return await self._fallback_parse(content, file_path)
74
+
75
+ try:
76
+ # Parse with Tree-sitter
77
+ tree = self._parser.parse(content.encode("utf-8"))
78
+ return self._extract_chunks_from_tree(tree, content, file_path)
79
+ except Exception as e:
80
+ logger.warning(f"Tree-sitter parsing failed for {file_path}: {e}")
81
+ return await self._fallback_parse(content, file_path)
82
+
83
+ def _extract_chunks_from_tree(
84
+ self, tree, content: str, file_path: Path
85
+ ) -> list[CodeChunk]:
86
+ """Extract code chunks from Tree-sitter AST."""
87
+ chunks = []
88
+ lines = self._split_into_lines(content)
89
+
90
+ def visit_node(node, current_class=None):
91
+ """Recursively visit AST nodes."""
92
+ node_type = node.type
93
+
94
+ if node_type == "function_definition":
95
+ chunks.extend(
96
+ self._extract_function(node, lines, file_path, current_class)
97
+ )
98
+ elif node_type == "class_definition":
99
+ class_chunks = self._extract_class(node, lines, file_path)
100
+ chunks.extend(class_chunks)
101
+
102
+ # Visit class methods with class context
103
+ class_name = self._get_node_name(node)
104
+ for child in node.children:
105
+ visit_node(child, class_name)
106
+ elif node_type == "module":
107
+ # Extract module-level code
108
+ module_chunk = self._extract_module_chunk(node, lines, file_path)
109
+ if module_chunk:
110
+ chunks.append(module_chunk)
111
+
112
+ # Visit all children
113
+ for child in node.children:
114
+ visit_node(child)
115
+ else:
116
+ # Visit children for other node types
117
+ for child in node.children:
118
+ visit_node(child, current_class)
119
+
120
+ # Start traversal from root
121
+ visit_node(tree.root_node)
122
+
123
+ # If no specific chunks found, create a single chunk for the whole file
124
+ if not chunks:
125
+ chunks.append(
126
+ self._create_chunk(
127
+ content=content,
128
+ file_path=file_path,
129
+ start_line=1,
130
+ end_line=len(lines),
131
+ chunk_type="module",
132
+ )
133
+ )
134
+
135
+ return chunks
136
+
137
+ def _extract_function(
138
+ self, node, lines: list[str], file_path: Path, class_name: str | None = None
139
+ ) -> list[CodeChunk]:
140
+ """Extract function definition as a chunk."""
141
+ chunks = []
142
+
143
+ function_name = self._get_node_name(node)
144
+ start_line = node.start_point[0] + 1
145
+ end_line = node.end_point[0] + 1
146
+
147
+ # Get function content
148
+ content = self._get_line_range(lines, start_line, end_line)
149
+
150
+ # Extract docstring if present
151
+ docstring = self._extract_docstring(node, lines)
152
+
153
+ # Enhancement 1: Calculate complexity
154
+ complexity = self._calculate_complexity(node, "python")
155
+
156
+ # Enhancement 4: Extract decorators
157
+ decorators = self._extract_decorators(node, lines)
158
+
159
+ # Enhancement 4: Extract parameters
160
+ parameters = self._extract_parameters(node)
161
+
162
+ # Enhancement 4: Extract return type
163
+ return_type = self._extract_return_type(node)
164
+
165
+ chunk = self._create_chunk(
166
+ content=content,
167
+ file_path=file_path,
168
+ start_line=start_line,
169
+ end_line=end_line,
170
+ chunk_type="function",
171
+ function_name=function_name,
172
+ class_name=class_name,
173
+ docstring=docstring,
174
+ complexity_score=complexity,
175
+ decorators=decorators,
176
+ parameters=parameters,
177
+ return_type=return_type,
178
+ chunk_depth=2 if class_name else 1,
179
+ )
180
+ chunks.append(chunk)
181
+
182
+ return chunks
183
+
184
+ def _extract_class(
185
+ self, node, lines: list[str], file_path: Path
186
+ ) -> list[CodeChunk]:
187
+ """Extract class definition as a chunk."""
188
+ chunks = []
189
+
190
+ class_name = self._get_node_name(node)
191
+ start_line = node.start_point[0] + 1
192
+ end_line = node.end_point[0] + 1
193
+
194
+ # Get class content
195
+ content = self._get_line_range(lines, start_line, end_line)
196
+
197
+ # Extract docstring if present
198
+ docstring = self._extract_docstring(node, lines)
199
+
200
+ # Enhancement 1: Calculate complexity (for the entire class)
201
+ complexity = self._calculate_complexity(node, "python")
202
+
203
+ # Enhancement 4: Extract decorators
204
+ decorators = self._extract_decorators(node, lines)
205
+
206
+ chunk = self._create_chunk(
207
+ content=content,
208
+ file_path=file_path,
209
+ start_line=start_line,
210
+ end_line=end_line,
211
+ chunk_type="class",
212
+ class_name=class_name,
213
+ docstring=docstring,
214
+ complexity_score=complexity,
215
+ decorators=decorators,
216
+ chunk_depth=1,
217
+ )
218
+ chunks.append(chunk)
219
+
220
+ return chunks
221
+
222
+ def _extract_module_chunk(
223
+ self, node, lines: list[str], file_path: Path
224
+ ) -> CodeChunk | None:
225
+ """Extract module-level code (imports, constants, etc.)."""
226
+ # Look for module-level statements (not inside functions/classes)
227
+ module_lines = []
228
+
229
+ for child in node.children:
230
+ if child.type in ["import_statement", "import_from_statement"]:
231
+ start_line = child.start_point[0] + 1
232
+ end_line = child.end_point[0] + 1
233
+ import_content = self._get_line_range(lines, start_line, end_line)
234
+ module_lines.append(import_content.strip())
235
+
236
+ if module_lines:
237
+ content = "\n".join(module_lines)
238
+ return self._create_chunk(
239
+ content=content,
240
+ file_path=file_path,
241
+ start_line=1,
242
+ end_line=len(module_lines),
243
+ chunk_type="imports",
244
+ )
245
+
246
+ return None
247
+
248
+ def _get_node_name(self, node) -> str | None:
249
+ """Extract name from a named node (function, class, etc.)."""
250
+ for child in node.children:
251
+ if child.type == "identifier":
252
+ return child.text.decode("utf-8")
253
+ return None
254
+
255
+ def _extract_docstring(self, node, lines: list[str]) -> str | None:
256
+ """Extract docstring from a function or class node."""
257
+ # Look for string literal as first statement in body
258
+ for child in node.children:
259
+ if child.type == "block":
260
+ for stmt in child.children:
261
+ if stmt.type == "expression_statement":
262
+ for expr_child in stmt.children:
263
+ if expr_child.type == "string":
264
+ # Extract string content
265
+ start_line = expr_child.start_point[0] + 1
266
+ end_line = expr_child.end_point[0] + 1
267
+ docstring = self._get_line_range(
268
+ lines, start_line, end_line
269
+ )
270
+ # Clean up docstring (remove quotes)
271
+ return self._clean_docstring(docstring)
272
+ return None
273
+
274
+ def _clean_docstring(self, docstring: str) -> str:
275
+ """Clean up extracted docstring."""
276
+ # Remove triple quotes and clean whitespace
277
+ cleaned = re.sub(r'^["\']{{3}}|["\']{{3}}$', "", docstring.strip())
278
+ cleaned = re.sub(r'^["\']|["\']$', "", cleaned.strip())
279
+ return cleaned.strip()
280
+
281
+ async def _fallback_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
282
+ """Fallback parsing using regex when Tree-sitter is not available."""
283
+ chunks = []
284
+ lines = self._split_into_lines(content)
285
+
286
+ # Enhanced regex patterns
287
+ function_pattern = re.compile(r"^\s*def\s+(\w+)\s*\(", re.MULTILINE)
288
+ class_pattern = re.compile(r"^\s*class\s+(\w+)\s*[:\(]", re.MULTILINE)
289
+ import_pattern = re.compile(r"^\s*(from\s+\S+\s+)?import\s+(.+)", re.MULTILINE)
290
+
291
+ # Extract imports first
292
+ imports = []
293
+ for match in import_pattern.finditer(content):
294
+ import_line = match.group(0).strip()
295
+ imports.append(import_line)
296
+
297
+ # Find functions
298
+ for match in function_pattern.finditer(content):
299
+ function_name = match.group(1)
300
+ # Find the actual line with 'def' by looking for it in the match
301
+ match_text = match.group(0)
302
+ def_pos_in_match = match_text.find("def")
303
+ actual_def_pos = match.start() + def_pos_in_match
304
+ start_line = content[:actual_def_pos].count("\n") + 1
305
+
306
+ # Find end of function (simple heuristic)
307
+ end_line = self._find_function_end(lines, start_line)
308
+
309
+ func_content = self._get_line_range(lines, start_line, end_line)
310
+
311
+ if func_content.strip(): # Only add if content is not empty
312
+ # Extract docstring using regex
313
+ docstring = self._extract_docstring_regex(func_content)
314
+
315
+ chunk = self._create_chunk(
316
+ content=func_content,
317
+ file_path=file_path,
318
+ start_line=start_line,
319
+ end_line=end_line,
320
+ chunk_type="function",
321
+ function_name=function_name,
322
+ docstring=docstring,
323
+ )
324
+ chunk.imports = imports # Add imports to chunk
325
+ chunks.append(chunk)
326
+
327
+ # Find classes
328
+ for match in class_pattern.finditer(content):
329
+ class_name = match.group(1)
330
+ # Find the actual line with 'class' by looking for it in the match
331
+ match_text = match.group(0)
332
+ class_pos_in_match = match_text.find("class")
333
+ actual_class_pos = match.start() + class_pos_in_match
334
+ start_line = content[:actual_class_pos].count("\n") + 1
335
+
336
+ # Find end of class (simple heuristic)
337
+ end_line = self._find_class_end(lines, start_line)
338
+
339
+ class_content = self._get_line_range(lines, start_line, end_line)
340
+
341
+ if class_content.strip(): # Only add if content is not empty
342
+ # Extract class docstring
343
+ docstring = self._extract_docstring_regex(class_content)
344
+
345
+ chunk = self._create_chunk(
346
+ content=class_content,
347
+ file_path=file_path,
348
+ start_line=start_line,
349
+ end_line=end_line,
350
+ chunk_type="class",
351
+ class_name=class_name,
352
+ docstring=docstring,
353
+ )
354
+ chunk.imports = imports # Add imports to chunk
355
+ chunks.append(chunk)
356
+
357
+ # If no functions or classes found, create chunks for the whole file
358
+ if not chunks:
359
+ chunks.append(
360
+ self._create_chunk(
361
+ content=content,
362
+ file_path=file_path,
363
+ start_line=1,
364
+ end_line=len(lines),
365
+ chunk_type="module",
366
+ )
367
+ )
368
+
369
+ return chunks
370
+
371
+ def _find_function_end(self, lines: list[str], start_line: int) -> int:
372
+ """Find the end line of a function using indentation."""
373
+ if start_line > len(lines):
374
+ return len(lines)
375
+
376
+ # Get initial indentation of the def line
377
+ start_idx = start_line - 1
378
+ if start_idx >= len(lines):
379
+ return len(lines)
380
+
381
+ def_line = lines[start_idx]
382
+ def_indent = len(def_line) - len(def_line.lstrip())
383
+
384
+ # Find end by looking for line with indentation <= def indentation
385
+ # Start from the line after the def line
386
+ for i in range(start_idx + 1, len(lines)):
387
+ line = lines[i]
388
+ if line.strip(): # Skip empty lines
389
+ current_indent = len(line) - len(line.lstrip())
390
+ if current_indent <= def_indent:
391
+ return i # Return 1-based line number (i is 0-based index)
392
+
393
+ # If we reach here, the function goes to the end of the file
394
+ return len(lines)
395
+
396
+ def _find_class_end(self, lines: list[str], start_line: int) -> int:
397
+ """Find the end line of a class using indentation."""
398
+ return self._find_function_end(lines, start_line)
399
+
400
+ def _extract_docstring_regex(self, content: str) -> str | None:
401
+ """Extract docstring using regex patterns."""
402
+ # Look for triple-quoted strings at the beginning of the content
403
+ # after the def/class line
404
+ lines = content.splitlines()
405
+ if len(lines) < 2:
406
+ return None
407
+
408
+ # Skip the def/class line and look for docstring in subsequent lines
409
+ for i in range(1, min(len(lines), 5)): # Check first few lines
410
+ line = lines[i].strip()
411
+ if not line:
412
+ continue
413
+
414
+ # Check for triple-quoted docstrings
415
+ if line.startswith('"""') or line.startswith("'''"):
416
+ quote_type = line[:3]
417
+
418
+ # Single-line docstring
419
+ if line.endswith(quote_type) and len(line) > 6:
420
+ return line[3:-3].strip()
421
+
422
+ # Multi-line docstring
423
+ docstring_lines = [line[3:]]
424
+ for j in range(i + 1, len(lines)):
425
+ next_line = lines[j].strip()
426
+ if next_line.endswith(quote_type):
427
+ docstring_lines.append(next_line[:-3])
428
+ break
429
+ docstring_lines.append(next_line)
430
+
431
+ return " ".join(docstring_lines).strip()
432
+
433
+ # If we hit non-docstring code, stop looking
434
+ if line and not line.startswith("#"):
435
+ break
436
+
437
+ return None
438
+
439
+ def _extract_decorators(self, node, lines: list[str]) -> list[str]:
440
+ """Extract decorator names from function/class node."""
441
+ decorators = []
442
+ for child in node.children:
443
+ if child.type == "decorator":
444
+ # Get decorator text (includes @ symbol)
445
+ dec_text = self._get_node_text(child).strip()
446
+ decorators.append(dec_text)
447
+ return decorators
448
+
449
+ def _extract_parameters(self, node) -> list[dict]:
450
+ """Extract function parameters with type annotations."""
451
+ parameters = []
452
+ for child in node.children:
453
+ if child.type == "parameters":
454
+ for param_node in child.children:
455
+ if param_node.type in (
456
+ "identifier",
457
+ "typed_parameter",
458
+ "default_parameter",
459
+ ):
460
+ param_info = {"name": None, "type": None, "default": None}
461
+
462
+ # Extract parameter name
463
+ if param_node.type == "identifier":
464
+ param_info["name"] = self._get_node_text(param_node)
465
+ else:
466
+ # For typed or default parameters, find the identifier
467
+ for subchild in param_node.children:
468
+ if subchild.type == "identifier":
469
+ param_info["name"] = self._get_node_text(subchild)
470
+ elif subchild.type == "type":
471
+ param_info["type"] = self._get_node_text(subchild)
472
+ elif "default" in subchild.type:
473
+ param_info["default"] = self._get_node_text(
474
+ subchild
475
+ )
476
+
477
+ if param_info["name"] and param_info["name"] not in (
478
+ "self",
479
+ "cls",
480
+ "(",
481
+ ")",
482
+ ",",
483
+ ):
484
+ parameters.append(param_info)
485
+ return parameters
486
+
487
+ def _extract_return_type(self, node) -> str | None:
488
+ """Extract return type annotation from function."""
489
+ for child in node.children:
490
+ if child.type == "type":
491
+ return self._get_node_text(child)
492
+ return None
493
+
494
+ def _get_node_text(self, node) -> str:
495
+ """Get text content of a node."""
496
+ if hasattr(node, "text"):
497
+ return node.text.decode("utf-8")
498
+ return ""
499
+
500
+ def get_supported_extensions(self) -> list[str]:
501
+ """Get supported file extensions."""
502
+ return [".py", ".pyw"]