PyPI - mcp-vector-search - Versions diffs - 1.0.3__py3-none-any.whl → 1.1.22__py3-none-any.whl - Mend

mcp-vector-search 1.0.3py3-none-any.whl → 1.1.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

mcp_vector_search/__init__.py +3 -3
mcp_vector_search/analysis/__init__.py +48 -1
mcp_vector_search/analysis/baseline/__init__.py +68 -0
mcp_vector_search/analysis/baseline/comparator.py +462 -0
mcp_vector_search/analysis/baseline/manager.py +621 -0
mcp_vector_search/analysis/collectors/__init__.py +35 -0
mcp_vector_search/analysis/collectors/cohesion.py +463 -0
mcp_vector_search/analysis/collectors/coupling.py +1162 -0
mcp_vector_search/analysis/collectors/halstead.py +514 -0
mcp_vector_search/analysis/collectors/smells.py +325 -0
mcp_vector_search/analysis/debt.py +516 -0
mcp_vector_search/analysis/interpretation.py +685 -0
mcp_vector_search/analysis/metrics.py +74 -1
mcp_vector_search/analysis/reporters/__init__.py +3 -1
mcp_vector_search/analysis/reporters/console.py +424 -0
mcp_vector_search/analysis/reporters/markdown.py +480 -0
mcp_vector_search/analysis/reporters/sarif.py +377 -0
mcp_vector_search/analysis/storage/__init__.py +93 -0
mcp_vector_search/analysis/storage/metrics_store.py +762 -0
mcp_vector_search/analysis/storage/schema.py +245 -0
mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
mcp_vector_search/analysis/trends.py +308 -0
mcp_vector_search/analysis/visualizer/__init__.py +90 -0
mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
mcp_vector_search/analysis/visualizer/exporter.py +484 -0
mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
mcp_vector_search/analysis/visualizer/schemas.py +525 -0
mcp_vector_search/cli/commands/analyze.py +665 -11
mcp_vector_search/cli/commands/chat.py +193 -0
mcp_vector_search/cli/commands/index.py +600 -2
mcp_vector_search/cli/commands/index_background.py +467 -0
mcp_vector_search/cli/commands/search.py +194 -1
mcp_vector_search/cli/commands/setup.py +64 -13
mcp_vector_search/cli/commands/status.py +302 -3
mcp_vector_search/cli/commands/visualize/cli.py +26 -10
mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +8 -4
mcp_vector_search/cli/commands/visualize/graph_builder.py +167 -234
mcp_vector_search/cli/commands/visualize/server.py +304 -15
mcp_vector_search/cli/commands/visualize/templates/base.py +60 -6
mcp_vector_search/cli/commands/visualize/templates/scripts.py +2100 -65
mcp_vector_search/cli/commands/visualize/templates/styles.py +1297 -88
mcp_vector_search/cli/didyoumean.py +5 -0
mcp_vector_search/cli/main.py +16 -5
mcp_vector_search/cli/output.py +134 -5
mcp_vector_search/config/thresholds.py +89 -1
mcp_vector_search/core/__init__.py +16 -0
mcp_vector_search/core/database.py +39 -2
mcp_vector_search/core/embeddings.py +24 -0
mcp_vector_search/core/git.py +380 -0
mcp_vector_search/core/indexer.py +445 -84
mcp_vector_search/core/llm_client.py +9 -4
mcp_vector_search/core/models.py +88 -1
mcp_vector_search/core/relationships.py +473 -0
mcp_vector_search/core/search.py +1 -1
mcp_vector_search/mcp/server.py +795 -4
mcp_vector_search/parsers/python.py +285 -5
mcp_vector_search/utils/gitignore.py +0 -3
{mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +3 -2
{mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/RECORD +62 -39
mcp_vector_search/cli/commands/visualize.py.original +0 -2536
{mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +0 -0
{mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +0 -0
{mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0

mcp_vector_search/cli/didyoumean.py CHANGED Viewed

@@ -62,6 +62,11 @@ class EnhancedDidYouMeanTyper(typer.Typer):
         if click_group is None:
             return None
+        # If click_group is an integer, it's an exit code from standalone_mode=False
+        # Return it as-is to preserve exit code propagation
+        if isinstance(click_group, int):
+            return click_group
         # Create enhanced DYM group with original group's properties
         enhanced_group = EnhancedDidYouMeanGroup(
             name=click_group.name,

mcp_vector_search/cli/main.py CHANGED Viewed

@@ -81,7 +81,7 @@ explore unfamiliar projects, and integrate with AI coding tools via MCP.
 [bold cyan]MAIN COMMANDS:[/bold cyan]
   setup     🚀 Zero-config setup (indexes + configures MCP)
   search    🔍 Semantic search (finds code by meaning)
-  chat      🤖 LLM-powered Q&A about your code (needs API key)
+  chat/ask  🤖 LLM-powered Q&A about your code (needs API key)
   status    📊 Show project status
   visualize 📊 Interactive code graph
@@ -94,7 +94,7 @@ explore unfamiliar projects, and integrate with AI coding tools via MCP.
   mcp-vector-search search "error handling"
   mcp-vector-search search --files "*.ts" "authentication"
   mcp-vector-search chat "where is the database configured?"
-  mcp-vector-search chat "how does auth work in this project?"
+  mcp-vector-search ask "how does auth work in this project?"
 [bold cyan]MORE COMMANDS:[/bold cyan]
   install    📦 Install project and MCP integrations
@@ -166,6 +166,9 @@ app.add_typer(search_app, name="search", help="🔍 Search code semantically")
 # 7.5. CHAT - LLM-powered intelligent search
 app.add_typer(chat_app, name="chat", help="🤖 Ask questions about code with LLM")
+app.add_typer(
+    chat_app, name="ask", help="🤖 Ask questions about code with LLM (alias for chat)"
+)
 # 8. INDEX - Index codebase
 app.add_typer(index_app, name="index", help="📇 Index codebase for semantic search")
@@ -357,7 +360,11 @@ def cli_with_suggestions():
     try:
         # Call the app with standalone_mode=False to get exceptions instead of sys.exit
-        app(standalone_mode=False)
+        # Capture return value - when standalone_mode=False, typer.Exit returns code instead of raising
+        exit_code = app(standalone_mode=False)
+        # Propagate non-zero exit codes (e.g., from --fail-on-smell quality gate)
+        if exit_code is not None and exit_code != 0:
+            sys.exit(exit_code)
     except click.UsageError as e:
         # Check if it's a "No such command" error
         if "No such command" in str(e):
@@ -395,8 +402,12 @@ def cli_with_suggestions():
     except click.Abort:
         # User interrupted (Ctrl+C)
         sys.exit(1)
-    except (SystemExit, click.exceptions.Exit):
-        # Re-raise system exits and typer.Exit
+    except (SystemExit, click.exceptions.Exit) as e:
+        # Re-raise system exits and typer.Exit with their exit codes
+        if hasattr(e, "exit_code"):
+            sys.exit(e.exit_code)
+        elif hasattr(e, "code"):
+            sys.exit(e.code if e.code is not None else 0)
         raise
     except Exception as e:
         # For other exceptions, show error and exit if verbose logging is enabled

mcp_vector_search/cli/output.py CHANGED Viewed

@@ -24,6 +24,46 @@ from ..core.models import ProjectInfo, SearchResult
 console = Console()
+def _get_grade_color(grade: str) -> str:
+    """Get color for complexity grade."""
+    grade_colors = {
+        "A": "green",
+        "B": "cyan",
+        "C": "yellow",
+        "D": "orange",
+        "F": "red",
+    }
+    return grade_colors.get(grade, "white")
+def _get_complexity_color(complexity: int) -> str:
+    """Get color based on cognitive complexity value."""
+    if complexity <= 5:
+        return "green"
+    elif complexity <= 10:
+        return "cyan"
+    elif complexity <= 20:
+        return "yellow"
+    elif complexity <= 30:
+        return "orange"
+    else:
+        return "red"
+def _get_quality_color(quality: int) -> str:
+    """Get color based on quality score (0-100)."""
+    if quality >= 80:
+        return "green"
+    elif quality >= 60:
+        return "cyan"
+    elif quality >= 40:
+        return "yellow"
+    elif quality >= 20:
+        return "orange"
+    else:
+        return "red"
 def setup_logging(level: str = "WARNING") -> None:
     """Setup structured logging with rich formatting.
@@ -113,8 +153,17 @@ def print_search_results(
     query: str,
     show_content: bool = True,
     max_content_lines: int = 10,
+    quality_weight: float = 0.0,
 ) -> None:
-    """Print search results in a formatted display."""
+    """Print search results in a formatted display with quality-aware ranking.
+    Args:
+        results: List of search results
+        query: Original search query
+        show_content: Whether to show code content
+        max_content_lines: Maximum lines of code to show
+        quality_weight: Weight for quality ranking (0.0-1.0), used to show score breakdown
+    """
     if not results:
         print_warning(f"No results found for query: '{query}'")
         return
@@ -122,7 +171,14 @@ def print_search_results(
     console.print(
         f"\n[bold blue]Search Results for:[/bold blue] [green]'{query}'[/green]"
     )
-    console.print(f"[dim]Found {len(results)} results[/dim]\n")
+    # Show quality ranking info if enabled
+    if quality_weight > 0.0:
+        console.print(
+            f"[dim]Found {len(results)} results (quality-aware ranking: {quality_weight:.0%} quality, {(1 - quality_weight):.0%} relevance)[/dim]\n"
+        )
+    else:
+        console.print(f"[dim]Found {len(results)} results[/dim]\n")
     for i, result in enumerate(results, 1):
         # Create result header
@@ -132,12 +188,85 @@ def print_search_results(
         if result.class_name:
             header += f" in [yellow]{result.class_name}[/yellow]"
-        # Add location and similarity
+        # Add location
         location = f"[dim]{result.location}[/dim]"
-        similarity = f"[green]{result.similarity_score:.2%}[/green]"
         console.print(f"{header}")
-        console.print(f"  {location} | Similarity: {similarity}")
+        # Build metadata line with quality metrics
+        metadata_parts = [location]
+        # Show score breakdown if quality ranking is enabled
+        if quality_weight > 0.0 and hasattr(result, "_original_similarity"):
+            # Quality-aware ranking: show relevance, quality, and combined
+            relevance_score = result._original_similarity
+            combined_score = result.similarity_score
+            quality_score = result.quality_score or 0
+            metadata_parts.append(f"Relevance: [cyan]{relevance_score:.2%}[/cyan]")
+            metadata_parts.append(
+                f"Quality: [{_get_quality_color(quality_score)}]{quality_score}[/{_get_quality_color(quality_score)}]"
+            )
+            metadata_parts.append(f"Combined: [green]{combined_score:.2%}[/green]")
+        else:
+            # Pure semantic search: show only similarity score
+            similarity = f"[green]{result.similarity_score:.2%}[/green]"
+            metadata_parts.append(f"Similarity: {similarity}")
+        console.print(f"  {' | '.join(metadata_parts)}")
+        # Add quality indicator line if quality metrics are available and not shown in scores
+        if result.complexity_grade and quality_weight == 0.0:
+            # Show quality metrics when not using quality ranking
+            quality_indicators = []
+            grade_color = _get_grade_color(result.complexity_grade)
+            quality_indicators.append(
+                f"Grade: [{grade_color}]{result.complexity_grade}[/{grade_color}]"
+            )
+            if result.cognitive_complexity is not None:
+                complexity_color = _get_complexity_color(result.cognitive_complexity)
+                quality_indicators.append(
+                    f"Complexity: [{complexity_color}]{result.cognitive_complexity}[/{complexity_color}]"
+                )
+            # Show quality indicator with check/cross
+            smell_count = result.smell_count or 0
+            if smell_count == 0:
+                console.print(
+                    f"  [green]✓[/green] {' | '.join(quality_indicators)} | No smells"
+                )
+            else:
+                # List smells if available
+                smells_text = f"{smell_count} smells"
+                if result.code_smells:
+                    smell_names = ", ".join(result.code_smells[:3])  # Show first 3
+                    if len(result.code_smells) > 3:
+                        smell_names += f", +{len(result.code_smells) - 3} more"
+                    smells_text = f"{smell_count} smells: [dim]{smell_names}[/dim]"
+                console.print(
+                    f"  [red]✗[/red] {' | '.join(quality_indicators)} | {smells_text}"
+                )
+        elif result.complexity_grade and quality_weight > 0.0:
+            # When using quality ranking, show simpler quality indicator
+            smell_count = result.smell_count or 0
+            if smell_count == 0:
+                console.print(
+                    f"  [green]✓[/green] Grade {result.complexity_grade}, No smells"
+                )
+            else:
+                smells_text = (
+                    ", ".join(result.code_smells[:3])
+                    if result.code_smells
+                    else f"{smell_count} smells"
+                )
+                if result.code_smells and len(result.code_smells) > 3:
+                    smells_text += f", +{len(result.code_smells) - 3} more"
+                console.print(
+                    f"  [red]✗[/red] Grade {result.complexity_grade}, {smells_text}"
+                )
         # Show code content if requested
         if show_content and result.content:

mcp_vector_search/config/thresholds.py CHANGED Viewed

@@ -55,19 +55,50 @@ class SmellThresholds:
     # High complexity
     high_complexity: int = 15
-    # God class (too many methods)
+    # God class (too many methods and lines)
     god_class_methods: int = 20
+    god_class_lines: int = 500
     # Feature envy (placeholder for future)
     feature_envy_external_calls: int = 5
+@dataclass
+class CouplingThresholds:
+    """Thresholds for coupling and instability metrics."""
+    # Efferent coupling (Ce) thresholds
+    efferent_low: int = 3  # Low coupling (0-3 dependencies)
+    efferent_moderate: int = 7  # Moderate coupling (4-7)
+    efferent_high: int = 12  # High coupling (8-12)
+    # Very high: 13+
+    # Afferent coupling (Ca) thresholds
+    afferent_low: int = 2  # Low coupling (0-2 dependents)
+    afferent_moderate: int = 5  # Moderate coupling (3-5)
+    afferent_high: int = 10  # High coupling (6-10)
+    # Very high: 11+
+    # Instability (I) thresholds for grades
+    instability_a: float = 0.2  # A grade: very stable (0.0-0.2)
+    instability_b: float = 0.4  # B grade: stable (0.2-0.4)
+    instability_c: float = 0.6  # C grade: balanced (0.4-0.6)
+    instability_d: float = 0.8  # D grade: unstable (0.6-0.8)
+    # F grade: very unstable (0.8-1.0)
+    # Category thresholds
+    stable_max: float = 0.3  # Stable category (0.0-0.3)
+    balanced_max: float = 0.7  # Balanced category (0.3-0.7)
+    # Unstable category: 0.7-1.0
 @dataclass
 class ThresholdConfig:
     """Complete threshold configuration."""
     complexity: ComplexityThresholds = field(default_factory=ComplexityThresholds)
     smells: SmellThresholds = field(default_factory=SmellThresholds)
+    coupling: CouplingThresholds = field(default_factory=CouplingThresholds)
     # Quality gate settings
     fail_on_f_grade: bool = True
@@ -104,6 +135,7 @@ class ThresholdConfig:
         """
         complexity_data = data.get("complexity", {})
         smells_data = data.get("smells", {})
+        coupling_data = data.get("coupling", {})
         return cls(
             complexity=(
@@ -114,6 +146,11 @@ class ThresholdConfig:
             smells=(
                 SmellThresholds(**smells_data) if smells_data else SmellThresholds()
             ),
+            coupling=(
+                CouplingThresholds(**coupling_data)
+                if coupling_data
+                else CouplingThresholds()
+            ),
             fail_on_f_grade=data.get("fail_on_f_grade", True),
             fail_on_smell_count=data.get("fail_on_smell_count", 10),
             warn_on_d_grade=data.get("warn_on_d_grade", True),
@@ -147,8 +184,23 @@ class ThresholdConfig:
                 "deep_nesting_depth": self.smells.deep_nesting_depth,
                 "high_complexity": self.smells.high_complexity,
                 "god_class_methods": self.smells.god_class_methods,
+                "god_class_lines": self.smells.god_class_lines,
                 "feature_envy_external_calls": self.smells.feature_envy_external_calls,
             },
+            "coupling": {
+                "efferent_low": self.coupling.efferent_low,
+                "efferent_moderate": self.coupling.efferent_moderate,
+                "efferent_high": self.coupling.efferent_high,
+                "afferent_low": self.coupling.afferent_low,
+                "afferent_moderate": self.coupling.afferent_moderate,
+                "afferent_high": self.coupling.afferent_high,
+                "instability_a": self.coupling.instability_a,
+                "instability_b": self.coupling.instability_b,
+                "instability_c": self.coupling.instability_c,
+                "instability_d": self.coupling.instability_d,
+                "stable_max": self.coupling.stable_max,
+                "balanced_max": self.coupling.balanced_max,
+            },
             "fail_on_f_grade": self.fail_on_f_grade,
             "fail_on_smell_count": self.fail_on_smell_count,
             "warn_on_d_grade": self.warn_on_d_grade,
@@ -183,3 +235,39 @@ class ThresholdConfig:
             return "D"
         else:
             return "F"
+    def get_instability_grade(self, instability: float) -> str:
+        """Get instability grade based on instability value.
+        Args:
+            instability: Instability value (0.0-1.0)
+        Returns:
+            Grade from A to F
+        """
+        if instability <= self.coupling.instability_a:
+            return "A"
+        elif instability <= self.coupling.instability_b:
+            return "B"
+        elif instability <= self.coupling.instability_c:
+            return "C"
+        elif instability <= self.coupling.instability_d:
+            return "D"
+        else:
+            return "F"
+    def get_stability_category(self, instability: float) -> str:
+        """Get stability category based on instability value.
+        Args:
+            instability: Instability value (0.0-1.0)
+        Returns:
+            Category: "Stable", "Balanced", or "Unstable"
+        """
+        if instability <= self.coupling.stable_max:
+            return "Stable"
+        elif instability <= self.coupling.balanced_max:
+            return "Balanced"
+        else:
+            return "Unstable"

mcp_vector_search/core/__init__.py CHANGED Viewed

@@ -1 +1,17 @@
 """Core functionality for MCP Vector Search."""
+from mcp_vector_search.core.git import (
+    GitError,
+    GitManager,
+    GitNotAvailableError,
+    GitNotRepoError,
+    GitReferenceError,
+)
+__all__ = [
+    "GitError",
+    "GitManager",
+    "GitNotAvailableError",
+    "GitNotRepoError",
+    "GitReferenceError",
+]

mcp_vector_search/core/database.py CHANGED Viewed

@@ -192,7 +192,7 @@ class ChromaVectorDatabase(VectorDatabase):
             except BaseException as init_error:
                 # Re-raise system exceptions we should never catch
                 if isinstance(
-                    init_error, (KeyboardInterrupt, SystemExit, GeneratorExit)
+                    init_error, KeyboardInterrupt | SystemExit | GeneratorExit
                 ):
                     raise
@@ -244,7 +244,7 @@ class ChromaVectorDatabase(VectorDatabase):
                     except BaseException as retry_error:
                         # Re-raise system exceptions
                         if isinstance(
-                            retry_error, (KeyboardInterrupt, SystemExit, GeneratorExit)
+                            retry_error, KeyboardInterrupt | SystemExit | GeneratorExit
                         ):
                             raise
@@ -477,6 +477,33 @@ class ChromaVectorDatabase(VectorDatabase):
                     if similarity >= similarity_threshold:
                         # Document contains the original content (no metadata appended)
+                        # Parse code smells from JSON if present
+                        code_smells = []
+                        if "code_smells" in metadata:
+                            try:
+                                code_smells = json.loads(metadata["code_smells"])
+                            except (json.JSONDecodeError, TypeError):
+                                code_smells = []
+                        # Calculate quality score from metrics (0-100 scale)
+                        quality_score = None
+                        if (
+                            "cognitive_complexity" in metadata
+                            and "smell_count" in metadata
+                        ):
+                            # Simple quality score: penalize complexity and smells
+                            complexity = metadata["cognitive_complexity"]
+                            smells = metadata["smell_count"]
+                            # Start with 100, penalize for complexity and smells
+                            score = 100
+                            # Complexity penalty: -2 points per complexity unit
+                            score -= min(50, complexity * 2)
+                            # Smell penalty: -10 points per smell
+                            score -= min(30, smells * 10)
+                            quality_score = max(0, score)
                         result = SearchResult(
                             content=doc,
                             file_path=Path(metadata["file_path"]),
@@ -488,6 +515,16 @@ class ChromaVectorDatabase(VectorDatabase):
                             chunk_type=metadata.get("chunk_type", "code"),
                             function_name=metadata.get("function_name") or None,
                             class_name=metadata.get("class_name") or None,
+                            # Quality metrics from structural analysis
+                            cognitive_complexity=metadata.get("cognitive_complexity"),
+                            cyclomatic_complexity=metadata.get("cyclomatic_complexity"),
+                            max_nesting_depth=metadata.get("max_nesting_depth"),
+                            parameter_count=metadata.get("parameter_count"),
+                            lines_of_code=metadata.get("lines_of_code"),
+                            complexity_grade=metadata.get("complexity_grade"),
+                            code_smells=code_smells,
+                            smell_count=metadata.get("smell_count"),
+                            quality_score=quality_score,
                         )
                         search_results.append(result)

mcp_vector_search/core/embeddings.py CHANGED Viewed

@@ -2,8 +2,32 @@
 import hashlib
 import json
+import multiprocessing
+import os
 from pathlib import Path
+# Configure tokenizers parallelism based on process context
+# Enable parallelism in main process for 2-4x speedup
+# Disable in forked processes to avoid deadlock warnings
+# See: https://github.com/huggingface/tokenizers/issues/1294
+def _configure_tokenizers_parallelism() -> None:
+    """Configure TOKENIZERS_PARALLELISM based on process context."""
+    # Check if we're in the main process
+    is_main_process = multiprocessing.current_process().name == "MainProcess"
+    if is_main_process:
+        # Enable parallelism in main process for better performance
+        # This gives 2-4x speedup for embedding generation
+        os.environ["TOKENIZERS_PARALLELISM"] = "true"
+    else:
+        # Disable in forked processes to avoid deadlock
+        os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# Configure before importing sentence_transformers
+_configure_tokenizers_parallelism()
 import aiofiles
 from loguru import logger
 from sentence_transformers import SentenceTransformer

mcp-vector-search 1.0.3__py3-none-any.whl → 1.1.22__py3-none-any.whl

mcp-vector-search 1.0.3py3-none-any.whl → 1.1.22py3-none-any.whl