PyPI - mcp-vector-search - Versions diffs - 0.0.3__py3-none-any.whl → 0.4.12__py3-none-any.whl - Mend

mcp-vector-search 0.0.3py3-none-any.whl → 0.4.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (49) hide show

mcp_vector_search/__init__.py +3 -2
mcp_vector_search/cli/commands/auto_index.py +397 -0
mcp_vector_search/cli/commands/config.py +88 -40
mcp_vector_search/cli/commands/index.py +198 -52
mcp_vector_search/cli/commands/init.py +471 -58
mcp_vector_search/cli/commands/install.py +284 -0
mcp_vector_search/cli/commands/mcp.py +495 -0
mcp_vector_search/cli/commands/search.py +241 -87
mcp_vector_search/cli/commands/status.py +184 -58
mcp_vector_search/cli/commands/watch.py +34 -35
mcp_vector_search/cli/didyoumean.py +184 -0
mcp_vector_search/cli/export.py +320 -0
mcp_vector_search/cli/history.py +292 -0
mcp_vector_search/cli/interactive.py +342 -0
mcp_vector_search/cli/main.py +175 -27
mcp_vector_search/cli/output.py +63 -45
mcp_vector_search/config/defaults.py +50 -36
mcp_vector_search/config/settings.py +49 -35
mcp_vector_search/core/auto_indexer.py +298 -0
mcp_vector_search/core/connection_pool.py +322 -0
mcp_vector_search/core/database.py +335 -25
mcp_vector_search/core/embeddings.py +73 -29
mcp_vector_search/core/exceptions.py +19 -2
mcp_vector_search/core/factory.py +310 -0
mcp_vector_search/core/git_hooks.py +345 -0
mcp_vector_search/core/indexer.py +237 -73
mcp_vector_search/core/models.py +21 -19
mcp_vector_search/core/project.py +73 -58
mcp_vector_search/core/scheduler.py +330 -0
mcp_vector_search/core/search.py +574 -86
mcp_vector_search/core/watcher.py +48 -46
mcp_vector_search/mcp/__init__.py +4 -0
mcp_vector_search/mcp/__main__.py +25 -0
mcp_vector_search/mcp/server.py +701 -0
mcp_vector_search/parsers/base.py +30 -31
mcp_vector_search/parsers/javascript.py +74 -48
mcp_vector_search/parsers/python.py +57 -49
mcp_vector_search/parsers/registry.py +47 -32
mcp_vector_search/parsers/text.py +179 -0
mcp_vector_search/utils/__init__.py +40 -0
mcp_vector_search/utils/gitignore.py +229 -0
mcp_vector_search/utils/timing.py +334 -0
mcp_vector_search/utils/version.py +47 -0
{mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/METADATA +173 -7
mcp_vector_search-0.4.12.dist-info/RECORD +54 -0
mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
{mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/WHEEL +0 -0
{mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/entry_points.txt +0 -0
{mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/licenses/LICENSE +0 -0

mcp_vector_search/core/search.py CHANGED Viewed

@@ -2,10 +2,11 @@
 import re
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any
 from loguru import logger
+from .auto_indexer import AutoIndexer, SearchTriggeredIndexer
 from .database import VectorDatabase
 from .exceptions import SearchError
 from .models import SearchResult
@@ -18,48 +19,70 @@ class SemanticSearchEngine:
         self,
         database: VectorDatabase,
         project_root: Path,
-        similarity_threshold: float = 0.7,
+        similarity_threshold: float = 0.3,
+        auto_indexer: AutoIndexer | None = None,
+        enable_auto_reindex: bool = True,
     ) -> None:
         """Initialize semantic search engine.
         Args:
             database: Vector database instance
             project_root: Project root directory
             similarity_threshold: Default similarity threshold
+            auto_indexer: Optional auto-indexer for semi-automatic reindexing
+            enable_auto_reindex: Whether to enable automatic reindexing
         """
         self.database = database
         self.project_root = project_root
         self.similarity_threshold = similarity_threshold
+        self.auto_indexer = auto_indexer
+        self.enable_auto_reindex = enable_auto_reindex
+        # Initialize search-triggered indexer if auto-indexer is provided
+        self.search_triggered_indexer = None
+        if auto_indexer and enable_auto_reindex:
+            self.search_triggered_indexer = SearchTriggeredIndexer(auto_indexer)
     async def search(
         self,
         query: str,
         limit: int = 10,
-        filters: Optional[Dict[str, Any]] = None,
-        similarity_threshold: Optional[float] = None,
+        filters: dict[str, Any] | None = None,
+        similarity_threshold: float | None = None,
         include_context: bool = True,
-    ) -> List[SearchResult]:
+    ) -> list[SearchResult]:
         """Perform semantic search for code.
         Args:
             query: Search query
             limit: Maximum number of results
             filters: Optional filters (language, file_path, etc.)
             similarity_threshold: Minimum similarity score
             include_context: Whether to include context lines
         Returns:
             List of search results
         """
         if not query.strip():
             return []
-        threshold = similarity_threshold or self.similarity_threshold
+        # Auto-reindex check before search
+        if self.search_triggered_indexer:
+            try:
+                await self.search_triggered_indexer.pre_search_hook()
+            except Exception as e:
+                logger.warning(f"Auto-reindex check failed: {e}")
+        threshold = (
+            similarity_threshold
+            if similarity_threshold is not None
+            else self._get_adaptive_threshold(query)
+        )
         try:
             # Preprocess query
             processed_query = self._preprocess_query(query)
             # Perform vector search
             results = await self.database.search(
                 query=processed_query,
@@ -77,7 +100,9 @@ class SemanticSearchEngine:
             # Apply additional ranking if needed
             ranked_results = self._rerank_results(enhanced_results, query)
-            logger.debug(f"Search for '{query}' returned {len(ranked_results)} results")
+            logger.debug(
+                f"Search for '{query}' with threshold {threshold:.3f} returned {len(ranked_results)} results"
+            )
             return ranked_results
         except Exception as e:
@@ -87,29 +112,31 @@ class SemanticSearchEngine:
     async def search_similar(
         self,
         file_path: Path,
-        function_name: Optional[str] = None,
+        function_name: str | None = None,
         limit: int = 10,
-        similarity_threshold: Optional[float] = None,
-    ) -> List[SearchResult]:
+        similarity_threshold: float | None = None,
+    ) -> list[SearchResult]:
         """Find code similar to a specific function or file.
         Args:
             file_path: Path to the reference file
             function_name: Specific function name (optional)
             limit: Maximum number of results
             similarity_threshold: Minimum similarity score
         Returns:
             List of similar code results
         """
         try:
             # Read the reference file
-            with open(file_path, "r", encoding="utf-8") as f:
+            with open(file_path, encoding="utf-8") as f:
                 content = f.read()
             # If function name is specified, try to extract just that function
             if function_name:
-                function_content = self._extract_function_content(content, function_name)
+                function_content = self._extract_function_content(
+                    content, function_name
+                )
                 if function_content:
                     content = function_content
@@ -128,27 +155,27 @@ class SemanticSearchEngine:
     async def search_by_context(
         self,
         context_description: str,
-        focus_areas: Optional[List[str]] = None,
+        focus_areas: list[str] | None = None,
         limit: int = 10,
-    ) -> List[SearchResult]:
+    ) -> list[SearchResult]:
         """Search for code based on contextual description.
         Args:
             context_description: Description of what you're looking for
             focus_areas: Areas to focus on (e.g., ["security", "authentication"])
             limit: Maximum number of results
         Returns:
             List of contextually relevant results
         """
         # Build enhanced query with focus areas
         query_parts = [context_description]
         if focus_areas:
             query_parts.extend(focus_areas)
         enhanced_query = " ".join(query_parts)
         return await self.search(
             query=enhanced_query,
             limit=limit,
@@ -157,46 +184,161 @@ class SemanticSearchEngine:
     def _preprocess_query(self, query: str) -> str:
         """Preprocess search query for better results.
         Args:
             query: Raw search query
         Returns:
             Processed query
         """
         # Remove extra whitespace
         query = re.sub(r"\s+", " ", query.strip())
-        # Expand common abbreviations
+        # Expand common programming abbreviations and synonyms
         expansions = {
-            "auth": "authentication",
-            "db": "database",
-            "api": "application programming interface",
-            "ui": "user interface",
-            "util": "utility",
-            "config": "configuration",
+            "auth": "authentication authorize login",
+            "db": "database data storage",
+            "api": "application programming interface endpoint",
+            "ui": "user interface frontend view",
+            "util": "utility helper function",
+            "config": "configuration settings options",
+            "async": "asynchronous await promise",
+            "sync": "synchronous blocking",
+            "func": "function method",
+            "var": "variable",
+            "param": "parameter argument",
+            "init": "initialize setup create",
+            "parse": "parsing parser analyze",
+            "validate": "validation check verify",
+            "handle": "handler process manage",
+            "error": "exception failure bug",
+            "test": "testing unittest spec",
+            "mock": "mocking stub fake",
+            "log": "logging logger debug",
         }
+        # Add programming language keywords and concepts
+        programming_concepts = {
+            "class": "class object type",
+            "method": "method function procedure",
+            "property": "property attribute field",
+            "import": "import require include",
+            "export": "export module public",
+            "return": "return yield output",
+            "loop": "loop iterate for while",
+            "condition": "condition if else branch",
+            "array": "array list collection",
+            "string": "string text character",
+            "number": "number integer float",
+            "boolean": "boolean true false",
+        }
+        # Merge all expansions
+        all_expansions = {**expansions, **programming_concepts}
         words = query.lower().split()
         expanded_words = []
         for word in words:
-            if word in expansions:
-                expanded_words.extend([word, expansions[word]])
-            else:
-                expanded_words.append(word)
-        return " ".join(expanded_words)
+            # Add original word
+            expanded_words.append(word)
+            # Add expansions if available
+            if word in all_expansions:
+                expanded_words.extend(all_expansions[word].split())
+        # Remove duplicates while preserving order
+        seen = set()
+        unique_words = []
+        for word in expanded_words:
+            if word not in seen:
+                seen.add(word)
+                unique_words.append(word)
+        return " ".join(unique_words)
+    def _get_adaptive_threshold(self, query: str) -> float:
+        """Get adaptive similarity threshold based on query characteristics.
+        Args:
+            query: Search query
+        Returns:
+            Adaptive similarity threshold
+        """
+        base_threshold = self.similarity_threshold
+        query_lower = query.lower()
+        words = query.split()
+        # Adjust threshold based on query characteristics
+        # 1. Single word queries - lower threshold for broader results
+        if len(words) == 1:
+            return max(0.01, base_threshold - 0.29)
+        # 2. Very specific technical terms - lower threshold
+        technical_terms = [
+            "javascript",
+            "typescript",
+            "python",
+            "java",
+            "cpp",
+            "rust",
+            "go",
+            "function",
+            "class",
+            "method",
+            "variable",
+            "import",
+            "export",
+            "async",
+            "await",
+            "promise",
+            "callback",
+            "api",
+            "database",
+            "parser",
+            "compiler",
+            "interpreter",
+            "syntax",
+            "semantic",
+            "mcp",
+            "gateway",
+            "server",
+            "client",
+            "protocol",
+        ]
+        if any(term in query_lower for term in technical_terms):
+            return max(0.01, base_threshold - 0.29)
+        # 3. Short queries (2-3 words) - slightly lower threshold
+        if len(words) <= 3:
+            return max(0.1, base_threshold - 0.1)
+        # 4. Long queries (>6 words) - higher threshold for precision
+        if len(words) > 6:
+            return min(0.8, base_threshold + 0.1)
+        # 5. Queries with exact identifiers (CamelCase, snake_case)
+        if re.search(r"\b[A-Z][a-zA-Z]*\b", query) or "_" in query:
+            return max(0.05, base_threshold - 0.25)
+        # 6. Common programming patterns
+        if any(pattern in query for pattern in ["()", ".", "->", "=>", "::"]):
+            return max(0.25, base_threshold - 0.1)
+        return base_threshold
     async def _enhance_result(
         self, result: SearchResult, include_context: bool
     ) -> SearchResult:
         """Enhance search result with additional information.
         Args:
             result: Original search result
             include_context: Whether to include context lines
         Returns:
             Enhanced search result
         """
@@ -205,7 +347,7 @@ class SemanticSearchEngine:
         try:
             # Read the source file to get context
-            with open(result.file_path, "r", encoding="utf-8") as f:
+            with open(result.file_path, encoding="utf-8") as f:
                 lines = f.readlines()
             # Get context lines before and after
@@ -216,9 +358,7 @@ class SemanticSearchEngine:
             context_before = [
                 line.rstrip() for line in lines[start_idx : result.start_line - 1]
             ]
-            context_after = [
-                line.rstrip() for line in lines[result.end_line : end_idx]
-            ]
+            context_after = [line.rstrip() for line in lines[result.end_line : end_idx]]
             # Update result with context
             result.context_before = context_before
@@ -230,66 +370,414 @@ class SemanticSearchEngine:
         return result
     def _rerank_results(
-        self, results: List[SearchResult], query: str
-    ) -> List[SearchResult]:
-        """Apply additional ranking to search results.
+        self, results: list[SearchResult], query: str
+    ) -> list[SearchResult]:
+        """Apply advanced ranking to search results using multiple factors.
         Args:
             results: Original search results
             query: Original search query
         Returns:
             Reranked search results
         """
-        # Simple reranking based on additional factors
+        if not results:
+            return results
         query_lower = query.lower()
+        query_words = set(query_lower.split())
+        for result in results:
+            # Start with base similarity score
+            score = result.similarity_score
+            # Factor 1: Exact matches in identifiers (high boost)
+            if result.function_name:
+                func_name_lower = result.function_name.lower()
+                if query_lower in func_name_lower:
+                    score += 0.15  # Strong boost for function name match
+                # Partial word matches
+                for word in query_words:
+                    if word in func_name_lower:
+                        score += 0.05
+            if result.class_name:
+                class_name_lower = result.class_name.lower()
+                if query_lower in class_name_lower:
+                    score += 0.15  # Strong boost for class name match
+                # Partial word matches
+                for word in query_words:
+                    if word in class_name_lower:
+                        score += 0.05
+            # Factor 2: File name relevance
+            file_name_lower = result.file_path.name.lower()
+            if query_lower in file_name_lower:
+                score += 0.08
+            for word in query_words:
+                if word in file_name_lower:
+                    score += 0.03
+            # Factor 3: Content density (how many query words appear)
+            content_lower = result.content.lower()
+            word_matches = sum(1 for word in query_words if word in content_lower)
+            if word_matches > 0:
+                density_boost = (word_matches / len(query_words)) * 0.1
+                score += density_boost
+            # Factor 4: Code structure preferences
+            # Boost functions over general code blocks
+            if result.chunk_type == "function":
+                score += 0.05
+            elif result.chunk_type == "class":
+                score += 0.03
+            # Factor 5: File type preferences (prefer source files over tests/docs)
+            file_ext = result.file_path.suffix.lower()
+            if file_ext in [".py", ".js", ".ts", ".java", ".cpp", ".c", ".go", ".rs"]:
+                score += 0.02
+            elif "test" in result.file_path.name.lower():
+                score -= 0.02  # Slightly penalize test files unless specifically searching for tests
+            # Factor 6: Recency bias (prefer shorter file paths - often more core files)
+            path_depth = len(result.file_path.parts)
+            if path_depth <= 3:
+                score += 0.02
+            elif path_depth > 5:
+                score -= 0.01
+            # Ensure score doesn't exceed 1.0
+            result.similarity_score = min(1.0, score)
+        # Sort by enhanced similarity score
+        results.sort(key=lambda r: r.similarity_score, reverse=True)
+        # Update ranks
+        for i, result in enumerate(results):
+            result.rank = i + 1
+        return results
+    def analyze_query(self, query: str) -> dict[str, Any]:
+        """Analyze search query and provide suggestions for improvement.
+        Args:
+            query: Search query to analyze
+        Returns:
+            Dictionary with analysis results and suggestions
+        """
+        analysis = {
+            "original_query": query,
+            "processed_query": self._preprocess_query(query),
+            "query_type": "general",
+            "suggestions": [],
+            "confidence": "medium",
+        }
+        query_lower = query.lower()
+        # Detect query type
+        if any(word in query_lower for word in ["function", "method", "def", "func"]):
+            analysis["query_type"] = "function_search"
+            analysis["suggestions"].append(
+                "Try searching for specific function names or patterns"
+            )
+        elif any(word in query_lower for word in ["class", "object", "type"]):
+            analysis["query_type"] = "class_search"
+            analysis["suggestions"].append(
+                "Include class inheritance or interface information"
+            )
+        elif any(word in query_lower for word in ["error", "exception", "bug", "fix"]):
+            analysis["query_type"] = "error_handling"
+            analysis["suggestions"].append("Include error types or exception names")
+        elif any(word in query_lower for word in ["test", "spec", "mock"]):
+            analysis["query_type"] = "testing"
+            analysis["suggestions"].append("Specify test framework or testing patterns")
+        elif any(word in query_lower for word in ["config", "setting", "option"]):
+            analysis["query_type"] = "configuration"
+            analysis["suggestions"].append(
+                "Include configuration file types or setting names"
+            )
+        # Analyze query complexity
+        words = query.split()
+        if len(words) == 1:
+            analysis["confidence"] = "low"
+            analysis["suggestions"].append(
+                "Try adding more descriptive words for better results"
+            )
+        elif len(words) > 10:
+            analysis["confidence"] = "low"
+            analysis["suggestions"].append(
+                "Consider simplifying your query for better matching"
+            )
+        else:
+            analysis["confidence"] = "high"
+        # Check for common programming patterns
+        if re.search(r"\b\w+\(\)", query):
+            analysis["suggestions"].append(
+                "Function call detected - searching for function definitions"
+            )
+        if re.search(r"\b[A-Z][a-zA-Z]*\b", query):
+            analysis["suggestions"].append(
+                "CamelCase detected - searching for class or type names"
+            )
+        if re.search(r"\b\w+\.\w+", query):
+            analysis["suggestions"].append(
+                "Dot notation detected - searching for method calls or properties"
+            )
+        return analysis
+    def suggest_related_queries(
+        self, query: str, results: list[SearchResult]
+    ) -> list[str]:
+        """Suggest related queries based on search results.
+        Args:
+            query: Original search query
+            results: Search results
+        Returns:
+            List of suggested related queries
+        """
+        suggestions = []
+        if not results:
+            # No results - suggest broader queries
+            words = query.lower().split()
+            if len(words) > 1:
+                # Try individual words
+                suggestions.extend(words[:3])  # Top 3 words
+            # Suggest common related terms
+            related_terms = {
+                "auth": ["login", "user", "session", "token"],
+                "database": ["query", "model", "schema", "connection"],
+                "api": ["endpoint", "request", "response", "handler"],
+                "test": ["mock", "assert", "spec", "unit"],
+                "error": ["exception", "handle", "catch", "debug"],
+            }
+            for word in words:
+                if word in related_terms:
+                    suggestions.extend(related_terms[word][:2])
+        else:
+            # Extract common patterns from results
+            function_names = [r.function_name for r in results if r.function_name]
+            class_names = [r.class_name for r in results if r.class_name]
+            # Suggest function names
+            if function_names:
+                unique_functions = list(set(function_names))[:3]
+                suggestions.extend(unique_functions)
+            # Suggest class names
+            if class_names:
+                unique_classes = list(set(class_names))[:3]
+                suggestions.extend(unique_classes)
+            # Suggest file-based queries
+            file_patterns = set()
+            for result in results[:5]:  # Top 5 results
+                file_name = result.file_path.stem
+                if "_" in file_name:
+                    file_patterns.update(file_name.split("_"))
+                elif file_name not in suggestions:
+                    file_patterns.add(file_name)
+            suggestions.extend(list(file_patterns)[:3])
+        # Remove duplicates and original query words
+        query_words = set(query.lower().split())
+        unique_suggestions = []
+        for suggestion in suggestions:
+            if (
+                suggestion
+                and suggestion.lower() not in query_words
+                and suggestion not in unique_suggestions
+            ):
+                unique_suggestions.append(suggestion)
+        return unique_suggestions[:5]  # Return top 5 suggestions
+    async def search_with_context(
+        self,
+        query: str,
+        context_files: list[Path] | None = None,
+        limit: int = 10,
+        similarity_threshold: float | None = None,
+    ) -> dict[str, Any]:
+        """Enhanced search with contextual analysis and suggestions.
+        Args:
+            query: Search query
+            context_files: Optional list of files to provide context
+            limit: Maximum number of results
+            similarity_threshold: Minimum similarity score
+        Returns:
+            Dictionary with results, analysis, and suggestions
+        """
+        # Analyze the query
+        query_analysis = self.analyze_query(query)
+        # Perform the search
+        results = await self.search(
+            query=query,
+            limit=limit,
+            similarity_threshold=similarity_threshold,
+            include_context=True,
+        )
+        # Get related query suggestions
+        suggestions = self.suggest_related_queries(query, results)
+        # Enhance results with additional context if context files provided
+        if context_files:
+            results = await self._enhance_with_file_context(results, context_files)
+        # Calculate result quality metrics
+        quality_metrics = self._calculate_result_quality(results, query)
+        return {
+            "query": query,
+            "analysis": query_analysis,
+            "results": results,
+            "suggestions": suggestions,
+            "metrics": quality_metrics,
+            "total_results": len(results),
+        }
+    async def _enhance_with_file_context(
+        self, results: list[SearchResult], context_files: list[Path]
+    ) -> list[SearchResult]:
+        """Enhance results by considering context from specific files.
+        Args:
+            results: Original search results
+            context_files: Files to use for context
+        Returns:
+            Enhanced search results
+        """
+        # Read context from files
+        context_content = []
+        for file_path in context_files:
+            try:
+                with open(file_path, encoding="utf-8") as f:
+                    content = f.read()
+                    context_content.append(content)
+            except Exception as e:
+                logger.warning(f"Failed to read context file {file_path}: {e}")
+        if not context_content:
+            return results
+        # Boost results that are related to context files
+        context_text = " ".join(context_content).lower()
         for result in results:
-            # Boost score for exact matches in function/class names
-            boost = 0.0
-            if result.function_name and query_lower in result.function_name.lower():
-                boost += 0.1
-            if result.class_name and query_lower in result.class_name.lower():
-                boost += 0.1
-            # Boost score for matches in file name
-            if query_lower in result.file_path.name.lower():
-                boost += 0.05
-            # Apply boost
-            result.similarity_score = min(1.0, result.similarity_score + boost)
-        # Re-sort by similarity score
+            # Check if result is from one of the context files
+            if result.file_path in context_files:
+                result.similarity_score = min(1.0, result.similarity_score + 0.1)
+            # Check if result content relates to context
+            result.content.lower()
+            if result.function_name:
+                func_name_lower = result.function_name.lower()
+                if func_name_lower in context_text:
+                    result.similarity_score = min(1.0, result.similarity_score + 0.05)
+            if result.class_name:
+                class_name_lower = result.class_name.lower()
+                if class_name_lower in context_text:
+                    result.similarity_score = min(1.0, result.similarity_score + 0.05)
+        # Re-sort by updated scores
         results.sort(key=lambda r: r.similarity_score, reverse=True)
         # Update ranks
         for i, result in enumerate(results):
             result.rank = i + 1
         return results
-    def _extract_function_content(self, content: str, function_name: str) -> Optional[str]:
+    def _calculate_result_quality(
+        self, results: list[SearchResult], query: str
+    ) -> dict[str, Any]:
+        """Calculate quality metrics for search results.
+        Args:
+            results: Search results
+            query: Original query
+        Returns:
+            Dictionary with quality metrics
+        """
+        if not results:
+            return {
+                "average_score": 0.0,
+                "score_distribution": {},
+                "diversity": 0.0,
+                "coverage": 0.0,
+            }
+        # Calculate average similarity score
+        scores = [r.similarity_score for r in results]
+        avg_score = sum(scores) / len(scores)
+        # Score distribution
+        high_quality = sum(1 for s in scores if s >= 0.8)
+        medium_quality = sum(1 for s in scores if 0.6 <= s < 0.8)
+        low_quality = sum(1 for s in scores if s < 0.6)
+        # Diversity (unique files)
+        unique_files = len({r.file_path for r in results})
+        diversity = unique_files / len(results) if results else 0.0
+        # Coverage (how many query words are covered)
+        query_words = set(query.lower().split())
+        covered_words = set()
+        for result in results:
+            content_words = set(result.content.lower().split())
+            covered_words.update(query_words.intersection(content_words))
+        coverage = len(covered_words) / len(query_words) if query_words else 0.0
+        return {
+            "average_score": round(avg_score, 3),
+            "score_distribution": {
+                "high_quality": high_quality,
+                "medium_quality": medium_quality,
+                "low_quality": low_quality,
+            },
+            "diversity": round(diversity, 3),
+            "coverage": round(coverage, 3),
+        }
+    def _extract_function_content(self, content: str, function_name: str) -> str | None:
         """Extract content of a specific function from code.
         Args:
             content: Full file content
             function_name: Name of function to extract
         Returns:
             Function content if found, None otherwise
         """
         # Simple regex-based extraction (could be improved with AST)
         pattern = rf"^\s*def\s+{re.escape(function_name)}\s*\("
         lines = content.splitlines()
         for i, line in enumerate(lines):
             if re.match(pattern, line):
                 # Found function start, now find the end
                 start_line = i
                 indent_level = len(line) - len(line.lstrip())
                 # Find end of function
                 end_line = len(lines)
                 for j in range(i + 1, len(lines)):
@@ -298,27 +786,27 @@ class SemanticSearchEngine:
                         if current_indent <= indent_level:
                             end_line = j
                             break
                 return "\n".join(lines[start_line:end_line])
         return None
-    async def get_search_stats(self) -> Dict[str, Any]:
+    async def get_search_stats(self) -> dict[str, Any]:
         """Get search engine statistics.
         Returns:
             Dictionary with search statistics
         """
         try:
             db_stats = await self.database.get_stats()
             return {
                 "total_chunks": db_stats.total_chunks,
                 "languages": db_stats.languages,
                 "similarity_threshold": self.similarity_threshold,
                 "project_root": str(self.project_root),
             }
         except Exception as e:
             logger.error(f"Failed to get search stats: {e}")
             return {"error": str(e)}

mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.12__py3-none-any.whl

Potentially problematic release.

mcp-vector-search 0.0.3py3-none-any.whl → 0.4.12py3-none-any.whl