PyPI - mcp-vector-search - Versions diffs - 0.7.6__py3-none-any.whl → 0.8.2__py3-none-any.whl - Mend

mcp-vector-search 0.7.6py3-none-any.whl → 0.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (14) hide show

mcp_vector_search/core/indexer.py CHANGED Viewed

@@ -295,8 +295,11 @@ class SemanticIndexer:
                 logger.debug(f"No chunks extracted from {file_path}")
                 return True  # Not an error, just empty file
+            # Build hierarchical relationships between chunks
+            chunks_with_hierarchy = self._build_chunk_hierarchy(chunks)
             # Add chunks to database
-            await self.database.add_chunks(chunks)
+            await self.database.add_chunks(chunks_with_hierarchy)
             # Update metadata after successful indexing
             metadata = self._load_index_metadata()
@@ -710,8 +713,11 @@ class SemanticIndexer:
                     chunks = await self._parse_file(file_path)
                     if chunks:
+                        # Build hierarchical relationships
+                        chunks_with_hierarchy = self._build_chunk_hierarchy(chunks)
                         # Add chunks to database
-                        await self.database.add_chunks(chunks)
+                        await self.database.add_chunks(chunks_with_hierarchy)
                         chunks_added = len(chunks)
                         logger.debug(f"Indexed {chunks_added} chunks from {file_path}")
@@ -721,11 +727,86 @@ class SemanticIndexer:
                     metadata[str(file_path)] = os.path.getmtime(file_path)
                 except Exception as e:
-                    logger.error(f"Failed to index file {file_path}: {e}")
+                    error_msg = f"Failed to index file {file_path}: {type(e).__name__}: {str(e)}"
+                    logger.error(error_msg)
                     success = False
+                    # Save error to error log file
+                    try:
+                        error_log_path = self.project_root / ".mcp-vector-search" / "indexing_errors.log"
+                        with open(error_log_path, "a", encoding="utf-8") as f:
+                            from datetime import datetime
+                            timestamp = datetime.now().isoformat()
+                            f.write(f"[{timestamp}] {error_msg}\n")
+                    except Exception as log_err:
+                        logger.debug(f"Failed to write error log: {log_err}")
                 # Yield progress update
                 yield (file_path, chunks_added, success)
         # Save metadata at the end
         self._save_index_metadata(metadata)
+    def _build_chunk_hierarchy(self, chunks: list[CodeChunk]) -> list[CodeChunk]:
+        """Build parent-child relationships between chunks.
+        Logic:
+        - Module chunks (chunk_type="module") have depth 0
+        - Class chunks have depth 1, parent is module
+        - Method chunks have depth 2, parent is class
+        - Function chunks outside classes have depth 1, parent is module
+        - Nested classes increment depth
+        Args:
+            chunks: List of code chunks to process
+        Returns:
+            List of chunks with hierarchy relationships established
+        """
+        if not chunks:
+            return chunks
+        # Group chunks by type and name
+        module_chunks = [c for c in chunks if c.chunk_type in ("module", "imports")]
+        class_chunks = [c for c in chunks if c.chunk_type in ("class", "interface", "mixin")]
+        function_chunks = [c for c in chunks if c.chunk_type in ("function", "method", "constructor")]
+        # Build relationships
+        for func in function_chunks:
+            if func.class_name:
+                # Find parent class
+                parent_class = next(
+                    (c for c in class_chunks if c.class_name == func.class_name),
+                    None
+                )
+                if parent_class:
+                    func.parent_chunk_id = parent_class.chunk_id
+                    func.chunk_depth = parent_class.chunk_depth + 1
+                    if func.chunk_id not in parent_class.child_chunk_ids:
+                        parent_class.child_chunk_ids.append(func.chunk_id)
+            else:
+                # Top-level function
+                if not func.chunk_depth:
+                    func.chunk_depth = 1
+                # Link to module if exists
+                if module_chunks and not func.parent_chunk_id:
+                    func.parent_chunk_id = module_chunks[0].chunk_id
+                    if func.chunk_id not in module_chunks[0].child_chunk_ids:
+                        module_chunks[0].child_chunk_ids.append(func.chunk_id)
+        for cls in class_chunks:
+            # Classes without parent are top-level (depth 1)
+            if not cls.chunk_depth:
+                cls.chunk_depth = 1
+            # Link to module if exists
+            if module_chunks and not cls.parent_chunk_id:
+                cls.parent_chunk_id = module_chunks[0].chunk_id
+                if cls.chunk_id not in module_chunks[0].child_chunk_ids:
+                    module_chunks[0].child_chunk_ids.append(cls.chunk_id)
+        # Module chunks stay at depth 0
+        for mod in module_chunks:
+            if not mod.chunk_depth:
+                mod.chunk_depth = 0
+        return chunks

mcp_vector_search/core/models.py CHANGED Viewed

@@ -21,12 +21,40 @@ class CodeChunk:
     class_name: str | None = None
     docstring: str | None = None
     imports: list[str] = None
+    # Enhancement 1: Complexity scoring
     complexity_score: float = 0.0
+    # Enhancement 3: Hierarchical relationships
+    chunk_id: str | None = None
+    parent_chunk_id: str | None = None
+    child_chunk_ids: list[str] = None
+    chunk_depth: int = 0
+    # Enhancement 4: Enhanced metadata
+    decorators: list[str] = None
+    parameters: list[dict] = None
+    return_type: str | None = None
+    type_annotations: dict[str, str] = None
     def __post_init__(self) -> None:
-        """Initialize default values."""
+        """Initialize default values and generate chunk ID."""
         if self.imports is None:
             self.imports = []
+        if self.child_chunk_ids is None:
+            self.child_chunk_ids = []
+        if self.decorators is None:
+            self.decorators = []
+        if self.parameters is None:
+            self.parameters = []
+        if self.type_annotations is None:
+            self.type_annotations = {}
+        # Generate chunk ID if not provided
+        if self.chunk_id is None:
+            import hashlib
+            id_string = f"{self.file_path}:{self.chunk_type}:{self.start_line}:{self.end_line}"
+            self.chunk_id = hashlib.sha256(id_string.encode()).hexdigest()[:16]
     @property
     def id(self) -> str:
@@ -52,6 +80,14 @@ class CodeChunk:
             "docstring": self.docstring,
             "imports": self.imports,
             "complexity_score": self.complexity_score,
+            "chunk_id": self.chunk_id,
+            "parent_chunk_id": self.parent_chunk_id,
+            "child_chunk_ids": self.child_chunk_ids,
+            "chunk_depth": self.chunk_depth,
+            "decorators": self.decorators,
+            "parameters": self.parameters,
+            "return_type": self.return_type,
+            "type_annotations": self.type_annotations,
         }
     @classmethod
@@ -69,6 +105,14 @@ class CodeChunk:
             docstring=data.get("docstring"),
             imports=data.get("imports", []),
             complexity_score=data.get("complexity_score", 0.0),
+            chunk_id=data.get("chunk_id"),
+            parent_chunk_id=data.get("parent_chunk_id"),
+            child_chunk_ids=data.get("child_chunk_ids", []),
+            chunk_depth=data.get("chunk_depth", 0),
+            decorators=data.get("decorators", []),
+            parameters=data.get("parameters", []),
+            return_type=data.get("return_type"),
+            type_annotations=data.get("type_annotations", {}),
         )

mcp_vector_search/parsers/base.py CHANGED Viewed

@@ -64,6 +64,68 @@ class BaseParser(ABC):
         """
         ...
+    def _calculate_complexity(self, node, language: str | None = None) -> float:
+        """Calculate cyclomatic complexity from AST node.
+        Cyclomatic complexity = Number of decision points + 1
+        Args:
+            node: AST node (tree-sitter)
+            language: Programming language for language-specific patterns (defaults to self.language)
+        Returns:
+            Complexity score (1.0 = simple, 10+ = complex)
+        """
+        if language is None:
+            language = self.language
+        if not hasattr(node, 'children'):
+            return 1.0
+        complexity = 1.0  # Base complexity
+        # Language-specific decision node types
+        decision_nodes = {
+            "python": {
+                "if_statement", "elif_clause", "while_statement", "for_statement",
+                "except_clause", "with_statement", "conditional_expression",
+                "boolean_operator"  # and, or
+            },
+            "javascript": {
+                "if_statement", "while_statement", "for_statement", "for_in_statement",
+                "switch_case", "catch_clause", "conditional_expression", "ternary_expression"
+            },
+            "typescript": {
+                "if_statement", "while_statement", "for_statement", "for_in_statement",
+                "switch_case", "catch_clause", "conditional_expression", "ternary_expression"
+            },
+            "dart": {
+                "if_statement", "while_statement", "for_statement", "for_in_statement",
+                "switch_case", "catch_clause", "conditional_expression"
+            },
+            "php": {
+                "if_statement", "elseif_clause", "while_statement", "foreach_statement",
+                "for_statement", "switch_case", "catch_clause", "ternary_expression"
+            },
+            "ruby": {
+                "if", "unless", "while", "until", "for", "case", "rescue",
+                "conditional"
+            }
+        }
+        nodes_to_count = decision_nodes.get(language, decision_nodes.get("python", set()))
+        def count_decision_points(n):
+            nonlocal complexity
+            if hasattr(n, 'type') and n.type in nodes_to_count:
+                complexity += 1
+            if hasattr(n, 'children'):
+                for child in n.children:
+                    count_decision_points(child)
+        count_decision_points(node)
+        return complexity
     def _create_chunk(
         self,
         content: str,
@@ -74,6 +136,13 @@ class BaseParser(ABC):
         function_name: str | None = None,
         class_name: str | None = None,
         docstring: str | None = None,
+        complexity_score: float = 0.0,
+        decorators: list[str] | None = None,
+        parameters: list[dict] | None = None,
+        return_type: str | None = None,
+        chunk_id: str | None = None,
+        parent_chunk_id: str | None = None,
+        chunk_depth: int = 0,
     ) -> CodeChunk:
         """Create a code chunk with metadata.
@@ -86,6 +155,13 @@ class BaseParser(ABC):
             function_name: Function name if applicable
             class_name: Class name if applicable
             docstring: Docstring if applicable
+            complexity_score: Cyclomatic complexity score
+            decorators: List of decorators/annotations
+            parameters: List of function parameters with metadata
+            return_type: Return type annotation
+            chunk_id: Unique chunk identifier
+            parent_chunk_id: Parent chunk ID for hierarchical relationships
+            chunk_depth: Nesting level in code hierarchy
         Returns:
             CodeChunk instance
@@ -100,6 +176,13 @@ class BaseParser(ABC):
             function_name=function_name,
             class_name=class_name,
             docstring=docstring,
+            complexity_score=complexity_score,
+            decorators=decorators or [],
+            parameters=parameters or [],
+            return_type=return_type,
+            chunk_id=chunk_id,
+            parent_chunk_id=parent_chunk_id,
+            chunk_depth=chunk_depth,
         )
     def _split_into_lines(self, content: str) -> list[str]:

mcp_vector_search/parsers/javascript.py CHANGED Viewed

@@ -10,11 +10,32 @@ from .base import BaseParser
 class JavaScriptParser(BaseParser):
-    """JavaScript/TypeScript parser with fallback regex-based parsing."""
+    """JavaScript parser with tree-sitter AST support and fallback regex parsing."""
     def __init__(self, language: str = "javascript") -> None:
         """Initialize JavaScript parser."""
         super().__init__(language)
+        self._parser = None
+        self._language = None
+        self._use_tree_sitter = False
+        self._initialize_parser()
+    def _initialize_parser(self) -> None:
+        """Initialize Tree-sitter parser for JavaScript."""
+        try:
+            from tree_sitter_language_pack import get_language, get_parser
+            self._language = get_language("javascript")
+            self._parser = get_parser("javascript")
+            logger.debug(
+                "JavaScript Tree-sitter parser initialized via tree-sitter-language-pack"
+            )
+            self._use_tree_sitter = True
+            return
+        except Exception as e:
+            logger.debug(f"tree-sitter-language-pack failed: {e}, using regex fallback")
+            self._use_tree_sitter = False
     async def parse_file(self, file_path: Path) -> list[CodeChunk]:
         """Parse a JavaScript/TypeScript file and extract code chunks."""
@@ -31,7 +52,317 @@ class JavaScriptParser(BaseParser):
         if not content.strip():
             return []
-        return await self._regex_parse(content, file_path)
+        if self._use_tree_sitter:
+            try:
+                tree = self._parser.parse(content.encode('utf-8'))
+                return self._extract_chunks_from_tree(tree, content, file_path)
+            except Exception as e:
+                logger.warning(f"Tree-sitter parsing failed for {file_path}: {e}")
+                return await self._regex_parse(content, file_path)
+        else:
+            return await self._regex_parse(content, file_path)
+    def _extract_chunks_from_tree(
+        self, tree, content: str, file_path: Path
+    ) -> list[CodeChunk]:
+        """Extract code chunks from JavaScript AST."""
+        chunks = []
+        lines = self._split_into_lines(content)
+        def visit_node(node, current_class=None):
+            """Recursively visit AST nodes."""
+            node_type = node.type
+            if node_type == "function_declaration":
+                chunks.extend(self._extract_function(node, lines, file_path, current_class))
+            elif node_type == "arrow_function":
+                chunks.extend(self._extract_arrow_function(node, lines, file_path, current_class))
+            elif node_type == "class_declaration":
+                class_chunks = self._extract_class(node, lines, file_path)
+                chunks.extend(class_chunks)
+                # Visit class methods
+                class_name = self._get_node_name(node)
+                for child in node.children:
+                    visit_node(child, class_name)
+            elif node_type == "method_definition":
+                chunks.extend(self._extract_method(node, lines, file_path, current_class))
+            elif node_type == "lexical_declaration":
+                # const/let declarations might be arrow functions
+                chunks.extend(self._extract_variable_function(node, lines, file_path, current_class))
+            # Recurse into children
+            if hasattr(node, 'children'):
+                for child in node.children:
+                    if child.type not in ("class_declaration", "function_declaration"):
+                        visit_node(child, current_class)
+        visit_node(tree.root_node)
+        # If no specific chunks found, create a single chunk for the whole file
+        if not chunks:
+            chunks.append(
+                self._create_chunk(
+                    content=content,
+                    file_path=file_path,
+                    start_line=1,
+                    end_line=len(lines),
+                    chunk_type="module",
+                )
+            )
+        return chunks
+    def _extract_function(
+        self, node, lines: list[str], file_path: Path, class_name: str | None = None
+    ) -> list[CodeChunk]:
+        """Extract function declaration from AST."""
+        function_name = self._get_node_name(node)
+        if not function_name:
+            return []
+        start_line = node.start_point[0] + 1
+        end_line = node.end_point[0] + 1
+        content = self._get_line_range(lines, start_line, end_line)
+        docstring = self._extract_jsdoc_from_node(node, lines)
+        # Calculate complexity
+        complexity = self._calculate_complexity(node, "javascript")
+        # Extract parameters
+        parameters = self._extract_js_parameters(node)
+        chunk = self._create_chunk(
+            content=content,
+            file_path=file_path,
+            start_line=start_line,
+            end_line=end_line,
+            chunk_type="function",
+            function_name=function_name,
+            class_name=class_name,
+            docstring=docstring,
+            complexity_score=complexity,
+            parameters=parameters,
+            chunk_depth=2 if class_name else 1,
+        )
+        return [chunk]
+    def _extract_arrow_function(
+        self, node, lines: list[str], file_path: Path, class_name: str | None = None
+    ) -> list[CodeChunk]:
+        """Extract arrow function from AST."""
+        # Arrow functions often don't have explicit names, try to get from parent
+        parent = getattr(node, 'parent', None)
+        function_name = None
+        if parent and parent.type == "variable_declarator":
+            function_name = self._get_node_name(parent)
+        if not function_name:
+            return []
+        start_line = node.start_point[0] + 1
+        end_line = node.end_point[0] + 1
+        content = self._get_line_range(lines, start_line, end_line)
+        docstring = self._extract_jsdoc_from_node(node, lines)
+        # Calculate complexity
+        complexity = self._calculate_complexity(node, "javascript")
+        # Extract parameters
+        parameters = self._extract_js_parameters(node)
+        chunk = self._create_chunk(
+            content=content,
+            file_path=file_path,
+            start_line=start_line,
+            end_line=end_line,
+            chunk_type="function",
+            function_name=function_name,
+            class_name=class_name,
+            docstring=docstring,
+            complexity_score=complexity,
+            parameters=parameters,
+            chunk_depth=2 if class_name else 1,
+        )
+        return [chunk]
+    def _extract_variable_function(
+        self, node, lines: list[str], file_path: Path, class_name: str | None = None
+    ) -> list[CodeChunk]:
+        """Extract function from variable declaration (const func = ...)."""
+        chunks = []
+        for child in node.children:
+            if child.type == "variable_declarator":
+                # Check if it's a function assignment
+                for subchild in child.children:
+                    if subchild.type in ("arrow_function", "function"):
+                        func_name = self._get_node_name(child)
+                        if func_name:
+                            start_line = child.start_point[0] + 1
+                            end_line = child.end_point[0] + 1
+                            content = self._get_line_range(lines, start_line, end_line)
+                            docstring = self._extract_jsdoc_from_node(child, lines)
+                            # Calculate complexity
+                            complexity = self._calculate_complexity(subchild, "javascript")
+                            # Extract parameters
+                            parameters = self._extract_js_parameters(subchild)
+                            chunk = self._create_chunk(
+                                content=content,
+                                file_path=file_path,
+                                start_line=start_line,
+                                end_line=end_line,
+                                chunk_type="function",
+                                function_name=func_name,
+                                class_name=class_name,
+                                docstring=docstring,
+                                complexity_score=complexity,
+                                parameters=parameters,
+                                chunk_depth=2 if class_name else 1,
+                            )
+                            chunks.append(chunk)
+        return chunks
+    def _extract_class(
+        self, node, lines: list[str], file_path: Path
+    ) -> list[CodeChunk]:
+        """Extract class declaration from AST."""
+        class_name = self._get_node_name(node)
+        if not class_name:
+            return []
+        start_line = node.start_point[0] + 1
+        end_line = node.end_point[0] + 1
+        content = self._get_line_range(lines, start_line, end_line)
+        docstring = self._extract_jsdoc_from_node(node, lines)
+        # Calculate complexity
+        complexity = self._calculate_complexity(node, "javascript")
+        chunk = self._create_chunk(
+            content=content,
+            file_path=file_path,
+            start_line=start_line,
+            end_line=end_line,
+            chunk_type="class",
+            class_name=class_name,
+            docstring=docstring,
+            complexity_score=complexity,
+            chunk_depth=1,
+        )
+        return [chunk]
+    def _extract_method(
+        self, node, lines: list[str], file_path: Path, class_name: str | None = None
+    ) -> list[CodeChunk]:
+        """Extract method definition from class."""
+        method_name = self._get_node_name(node)
+        if not method_name:
+            return []
+        start_line = node.start_point[0] + 1
+        end_line = node.end_point[0] + 1
+        content = self._get_line_range(lines, start_line, end_line)
+        docstring = self._extract_jsdoc_from_node(node, lines)
+        # Calculate complexity
+        complexity = self._calculate_complexity(node, "javascript")
+        # Extract parameters
+        parameters = self._extract_js_parameters(node)
+        # Check for decorators (TypeScript)
+        decorators = self._extract_decorators_from_node(node)
+        chunk = self._create_chunk(
+            content=content,
+            file_path=file_path,
+            start_line=start_line,
+            end_line=end_line,
+            chunk_type="method",
+            function_name=method_name,
+            class_name=class_name,
+            docstring=docstring,
+            complexity_score=complexity,
+            parameters=parameters,
+            decorators=decorators,
+            chunk_depth=2,
+        )
+        return [chunk]
+    def _get_node_name(self, node) -> str | None:
+        """Extract name from a named node."""
+        for child in node.children:
+            if child.type in ("identifier", "property_identifier"):
+                return child.text.decode("utf-8")
+        return None
+    def _get_node_text(self, node) -> str:
+        """Get text content of a node."""
+        if hasattr(node, 'text'):
+            return node.text.decode('utf-8')
+        return ""
+    def _extract_js_parameters(self, node) -> list[dict]:
+        """Extract function parameters from JavaScript/TypeScript AST."""
+        parameters = []
+        for child in node.children:
+            if child.type == "formal_parameters":
+                for param_node in child.children:
+                    if param_node.type in ("identifier", "required_parameter", "optional_parameter", "rest_parameter"):
+                        param_info = {
+                            "name": None,
+                            "type": None,
+                            "default": None
+                        }
+                        # Extract parameter details
+                        if param_node.type == "identifier":
+                            param_info["name"] = self._get_node_text(param_node)
+                        else:
+                            # TypeScript typed parameters
+                            for subchild in param_node.children:
+                                if subchild.type == "identifier":
+                                    param_info["name"] = self._get_node_text(subchild)
+                                elif subchild.type == "type_annotation":
+                                    param_info["type"] = self._get_node_text(subchild)
+                                elif "default" in subchild.type or subchild.type == "number":
+                                    param_info["default"] = self._get_node_text(subchild)
+                        if param_info["name"] and param_info["name"] not in ("(", ")", ",", "..."):
+                            # Clean up rest parameters
+                            if param_info["name"].startswith("..."):
+                                param_info["name"] = param_info["name"][3:]
+                                param_info["rest"] = True
+                            parameters.append(param_info)
+        return parameters
+    def _extract_decorators_from_node(self, node) -> list[str]:
+        """Extract decorators from TypeScript node."""
+        decorators = []
+        for child in node.children:
+            if child.type == "decorator":
+                decorators.append(self._get_node_text(child))
+        return decorators
+    def _extract_jsdoc_from_node(self, node, lines: list[str]) -> str | None:
+        """Extract JSDoc comment from before a node."""
+        start_line = node.start_point[0]
+        return self._extract_jsdoc(lines, start_line + 1)
     async def _regex_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
         """Parse JavaScript/TypeScript using regex patterns."""
@@ -262,3 +593,20 @@ class TypeScriptParser(JavaScriptParser):
     def __init__(self) -> None:
         """Initialize TypeScript parser."""
         super().__init__("typescript")
+    def _initialize_parser(self) -> None:
+        """Initialize Tree-sitter parser for TypeScript."""
+        try:
+            from tree_sitter_language_pack import get_language, get_parser
+            self._language = get_language("typescript")
+            self._parser = get_parser("typescript")
+            logger.debug(
+                "TypeScript Tree-sitter parser initialized via tree-sitter-language-pack"
+            )
+            self._use_tree_sitter = True
+            return
+        except Exception as e:
+            logger.debug(f"tree-sitter-language-pack failed: {e}, using regex fallback")
+            self._use_tree_sitter = False

mcp-vector-search 0.7.6__py3-none-any.whl → 0.8.2__py3-none-any.whl

Potentially problematic release.

mcp-vector-search 0.7.6py3-none-any.whl → 0.8.2py3-none-any.whl