PyPI - mcp-vector-search - Versions diffs - 1.0.3__py3-none-any.whl → 1.1.22__py3-none-any.whl - Mend

mcp-vector-search 1.0.3py3-none-any.whl → 1.1.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

mcp_vector_search/__init__.py +3 -3
mcp_vector_search/analysis/__init__.py +48 -1
mcp_vector_search/analysis/baseline/__init__.py +68 -0
mcp_vector_search/analysis/baseline/comparator.py +462 -0
mcp_vector_search/analysis/baseline/manager.py +621 -0
mcp_vector_search/analysis/collectors/__init__.py +35 -0
mcp_vector_search/analysis/collectors/cohesion.py +463 -0
mcp_vector_search/analysis/collectors/coupling.py +1162 -0
mcp_vector_search/analysis/collectors/halstead.py +514 -0
mcp_vector_search/analysis/collectors/smells.py +325 -0
mcp_vector_search/analysis/debt.py +516 -0
mcp_vector_search/analysis/interpretation.py +685 -0
mcp_vector_search/analysis/metrics.py +74 -1
mcp_vector_search/analysis/reporters/__init__.py +3 -1
mcp_vector_search/analysis/reporters/console.py +424 -0
mcp_vector_search/analysis/reporters/markdown.py +480 -0
mcp_vector_search/analysis/reporters/sarif.py +377 -0
mcp_vector_search/analysis/storage/__init__.py +93 -0
mcp_vector_search/analysis/storage/metrics_store.py +762 -0
mcp_vector_search/analysis/storage/schema.py +245 -0
mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
mcp_vector_search/analysis/trends.py +308 -0
mcp_vector_search/analysis/visualizer/__init__.py +90 -0
mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
mcp_vector_search/analysis/visualizer/exporter.py +484 -0
mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
mcp_vector_search/analysis/visualizer/schemas.py +525 -0
mcp_vector_search/cli/commands/analyze.py +665 -11
mcp_vector_search/cli/commands/chat.py +193 -0
mcp_vector_search/cli/commands/index.py +600 -2
mcp_vector_search/cli/commands/index_background.py +467 -0
mcp_vector_search/cli/commands/search.py +194 -1
mcp_vector_search/cli/commands/setup.py +64 -13
mcp_vector_search/cli/commands/status.py +302 -3
mcp_vector_search/cli/commands/visualize/cli.py +26 -10
mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +8 -4
mcp_vector_search/cli/commands/visualize/graph_builder.py +167 -234
mcp_vector_search/cli/commands/visualize/server.py +304 -15
mcp_vector_search/cli/commands/visualize/templates/base.py +60 -6
mcp_vector_search/cli/commands/visualize/templates/scripts.py +2100 -65
mcp_vector_search/cli/commands/visualize/templates/styles.py +1297 -88
mcp_vector_search/cli/didyoumean.py +5 -0
mcp_vector_search/cli/main.py +16 -5
mcp_vector_search/cli/output.py +134 -5
mcp_vector_search/config/thresholds.py +89 -1
mcp_vector_search/core/__init__.py +16 -0
mcp_vector_search/core/database.py +39 -2
mcp_vector_search/core/embeddings.py +24 -0
mcp_vector_search/core/git.py +380 -0
mcp_vector_search/core/indexer.py +445 -84
mcp_vector_search/core/llm_client.py +9 -4
mcp_vector_search/core/models.py +88 -1
mcp_vector_search/core/relationships.py +473 -0
mcp_vector_search/core/search.py +1 -1
mcp_vector_search/mcp/server.py +795 -4
mcp_vector_search/parsers/python.py +285 -5
mcp_vector_search/utils/gitignore.py +0 -3
{mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +3 -2
{mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/RECORD +62 -39
mcp_vector_search/cli/commands/visualize.py.original +0 -2536
{mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +0 -0
{mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +0 -0
{mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0

mcp_vector_search/cli/commands/visualize/graph_builder.py CHANGED Viewed

@@ -11,6 +11,7 @@ from typing import Any
 from loguru import logger
 from rich.console import Console
+from ....analysis.trends import TrendTracker
 from ....core.database import ChromaVectorDatabase
 from ....core.directory_index import DirectoryIndex
 from ....core.project import ProjectManager
@@ -19,6 +20,77 @@ from .state_manager import VisualizationState
 console = Console()
+def extract_chunk_name(content: str, fallback: str = "chunk") -> str:
+    """Extract first meaningful word from chunk content for labeling.
+    Args:
+        content: The chunk's code content
+        fallback: Fallback name if no meaningful word found
+    Returns:
+        First meaningful identifier found in the content
+    Examples:
+        >>> extract_chunk_name("def calculate_total(...)")
+        'calculate_total'
+        >>> extract_chunk_name("class UserManager:")
+        'UserManager'
+        >>> extract_chunk_name("# Comment about users")
+        'users'
+        >>> extract_chunk_name("import pandas as pd")
+        'pandas'
+    """
+    import re
+    # Skip common keywords that aren't meaningful as chunk labels
+    skip_words = {
+        "def",
+        "class",
+        "function",
+        "const",
+        "let",
+        "var",
+        "import",
+        "from",
+        "return",
+        "if",
+        "else",
+        "elif",
+        "for",
+        "while",
+        "try",
+        "except",
+        "finally",
+        "with",
+        "as",
+        "async",
+        "await",
+        "yield",
+        "self",
+        "this",
+        "true",
+        "false",
+        "none",
+        "null",
+        "undefined",
+        "public",
+        "private",
+        "protected",
+        "static",
+        "export",
+        "default",
+    }
+    # Find all words (alphanumeric + underscore, at least 2 chars)
+    words = re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]+\b", content)
+    for word in words:
+        if word.lower() not in skip_words:
+            return word
+    return fallback
 def get_subproject_color(subproject_name: str, index: int) -> str:
     """Get a consistent color for a subproject.
@@ -226,6 +298,13 @@ async def build_graph_data(
     console.print(f"[green]✓[/green] Loaded {len(dir_index.directories)} directories")
     for dir_path_str, directory in dir_index.directories.items():
         dir_id = f"dir_{hash(dir_path_str) & 0xFFFFFFFF:08x}"
+        # Compute parent directory ID (convert Path to string for JSON serialization)
+        parent_dir_id = None
+        parent_path_str = str(directory.parent_path) if directory.parent_path else None
+        if parent_path_str:
+            parent_dir_id = f"dir_{hash(parent_path_str) & 0xFFFFFFFF:08x}"
         dir_nodes[dir_path_str] = {
             "id": dir_id,
             "name": directory.name,
@@ -236,6 +315,8 @@ async def build_graph_data(
             "complexity": 0,
             "depth": directory.depth,
             "dir_path": dir_path_str,
+            "parent_id": parent_dir_id,  # Link to parent directory
+            "parent_path": parent_path_str,  # String for JSON serialization
             "file_count": directory.file_count,
             "subdirectory_count": directory.subdirectory_count,
             "total_chunks": directory.total_chunks,
@@ -245,6 +326,7 @@ async def build_graph_data(
         }
     # Create file nodes from chunks
+    # First pass: create file node entries
     for chunk in chunks:
         file_path_str = str(chunk.file_path)
         file_path = Path(file_path_str)
@@ -277,10 +359,17 @@ async def build_graph_data(
                 "end_line": 0,
                 "complexity": 0,
                 "depth": len(file_path.parts) - 1,
-                "parent_dir_id": parent_dir_id,
-                "parent_dir_path": parent_dir_str,
+                "parent_id": parent_dir_id,  # Consistent with directory nodes
+                "parent_path": parent_dir_str,
+                "chunk_count": 0,  # Will be computed below
             }
+    # Second pass: count chunks per file (pre-compute for consistent sizing)
+    for chunk in chunks:
+        file_path_str = str(chunk.file_path)
+        if file_path_str in file_nodes:
+            file_nodes[file_path_str]["chunk_count"] += 1
     # Add directory nodes to graph
     for dir_node in dir_nodes.values():
         nodes.append(dir_node)
@@ -289,237 +378,84 @@ async def build_graph_data(
     for file_node in file_nodes.values():
         nodes.append(file_node)
-    # Compute semantic relationships for code chunks
-    console.print("[cyan]Computing semantic relationships...[/cyan]")
-    code_chunks = [c for c in chunks if c.chunk_type in ["function", "method", "class"]]
-    semantic_links = []
-    # Pre-compute top 5 semantic relationships for each code chunk
-    for i, chunk in enumerate(code_chunks):
-        if i % 20 == 0:  # Progress indicator every 20 chunks
-            console.print(f"[dim]Processed {i}/{len(code_chunks)} chunks[/dim]")
-        try:
-            # Search for similar chunks using the chunk's content
-            similar_results = await database.search(
-                query=chunk.content[:500],  # Use first 500 chars for query
-                limit=6,  # Get 6 (exclude self = 5)
-                similarity_threshold=0.3,  # Lower threshold to catch more relationships
-            )
-            # Filter out self and create semantic links
-            for result in similar_results:
-                # Construct target chunk_id from file_path and line numbers
-                target_chunk = next(
-                    (
-                        c
-                        for c in chunks
-                        if str(c.file_path) == str(result.file_path)
-                        and c.start_line == result.start_line
-                        and c.end_line == result.end_line
-                    ),
-                    None,
-                )
-                if not target_chunk:
-                    continue
-                target_chunk_id = target_chunk.chunk_id or target_chunk.id
-                # Skip self-references
-                if target_chunk_id == (chunk.chunk_id or chunk.id):
-                    continue
-                # Add semantic link with similarity score
-                if result.similarity_score >= 0.2:
-                    semantic_links.append(
-                        {
-                            "source": chunk.chunk_id or chunk.id,
-                            "target": target_chunk_id,
-                            "type": "semantic",
-                            "similarity": result.similarity_score,
-                        }
-                    )
-                    # Only keep top 5
-                    if (
-                        len(
-                            [
-                                link
-                                for link in semantic_links
-                                if link["source"] == (chunk.chunk_id or chunk.id)
-                            ]
-                        )
-                        >= 5
-                    ):
-                        break
-        except Exception as e:
-            logger.debug(
-                f"Failed to compute semantic relationships for {chunk.chunk_id}: {e}"
+    # Link directories to their parent directories
+    for dir_node in dir_nodes.values():
+        if dir_node.get("parent_id"):
+            links.append(
+                {
+                    "source": dir_node["parent_id"],
+                    "target": dir_node["id"],
+                    "type": "dir_containment",
+                }
             )
-            continue
+    # Skip ALL relationship computation at startup for instant loading
+    # Relationships are lazy-loaded on-demand via /api/relationships/{chunk_id}
+    # This avoids the expensive 5+ minute semantic computation
+    caller_map: dict = {}  # Empty - callers lazy-loaded via API
     console.print(
-        f"[green]✓[/green] Computed {len(semantic_links)} semantic relationships"
+        "[green]✓[/green] Skipping relationship computation (lazy-loaded on node expand)"
     )
-    def extract_function_calls(code: str) -> set[str]:
-        """Extract actual function calls from Python code using AST.
-        Returns set of function names that are actually called (not just mentioned).
-        Avoids false positives from comments, docstrings, and string literals.
-        Args:
-            code: Python source code to analyze
-        Returns:
-            Set of function names that are actually called in the code
-        """
-        import ast
+    # Add chunk nodes
+    for chunk in chunks:
+        chunk_id = chunk.chunk_id or chunk.id
-        calls = set()
-        try:
-            tree = ast.parse(code)
-            for node in ast.walk(tree):
-                if isinstance(node, ast.Call):
-                    # Handle direct calls: foo()
-                    if isinstance(node.func, ast.Name):
-                        calls.add(node.func.id)
-                    # Handle method calls: obj.foo() - extract 'foo'
-                    elif isinstance(node.func, ast.Attribute):
-                        calls.add(node.func.attr)
-            return calls
-        except SyntaxError:
-            # If code can't be parsed (incomplete, etc.), fall back to empty set
-            # This is safer than false positives from naive substring matching
-            return set()
-    # Compute external caller relationships
-    console.print("[cyan]Computing external caller relationships...[/cyan]")
-    import time
-    start_time = time.time()
-    caller_map = {}  # Map chunk_id -> list of caller info
-    logger.info(f"Processing {len(code_chunks)} code chunks for external callers...")
-    for chunk_idx, chunk in enumerate(code_chunks):
-        if chunk_idx % 50 == 0:  # Progress every 50 chunks
-            elapsed = time.time() - start_time
-            logger.info(
-                f"Progress: {chunk_idx}/{len(code_chunks)} chunks ({elapsed:.1f}s elapsed)"
+        # Generate meaningful chunk name
+        chunk_name = chunk.function_name or chunk.class_name
+        if not chunk_name:
+            # Extract meaningful name from content
+            chunk_name = extract_chunk_name(
+                chunk.content, fallback=f"chunk_{chunk.start_line}"
             )
-            console.print(
-                f"[dim]Progress: {chunk_idx}/{len(code_chunks)} chunks ({elapsed:.1f}s)[/dim]"
+            logger.debug(
+                f"Generated chunk name '{chunk_name}' for {chunk.chunk_type} at {chunk.file_path}:{chunk.start_line}"
             )
-        chunk_id = chunk.chunk_id or chunk.id
-        file_path = str(chunk.file_path)
-        function_name = chunk.function_name or chunk.class_name
-        if not function_name:
-            continue
-        # Search for other chunks that reference this function/class name
-        other_chunks_count = 0
-        for other_chunk in chunks:
-            other_chunks_count += 1
-            if chunk_idx % 50 == 0 and other_chunks_count % 500 == 0:  # Inner progress
-                logger.debug(
-                    f"  Chunk {chunk_idx}: Scanning {other_chunks_count}/{len(chunks)} chunks"
-                )
-            other_file_path = str(other_chunk.file_path)
-            # Only track EXTERNAL callers (different file)
-            if other_file_path == file_path:
-                continue
-            # Extract actual function calls using AST (avoids false positives)
-            actual_calls = extract_function_calls(other_chunk.content)
-            # Check if this function is actually called (not just mentioned in comments)
-            if function_name in actual_calls:
-                other_chunk_id = other_chunk.chunk_id or other_chunk.id
-                other_name = (
-                    other_chunk.function_name
-                    or other_chunk.class_name
-                    or f"L{other_chunk.start_line}"
-                )
-                # Skip __init__ functions as callers - they are noise in "called by" lists
-                # (every class calls __init__ when constructing objects)
-                if other_name == "__init__":
-                    continue
-                if chunk_id not in caller_map:
-                    caller_map[chunk_id] = []
-                # Store caller information
-                caller_map[chunk_id].append(
-                    {
-                        "file": other_file_path,
-                        "chunk_id": other_chunk_id,
-                        "name": other_name,
-                        "type": other_chunk.chunk_type,
-                    }
-                )
-                logger.debug(
-                    f"Found actual call: {other_name} ({other_file_path}) -> "
-                    f"{function_name} ({file_path})"
-                )
-    # Count total caller relationships
-    total_callers = sum(len(callers) for callers in caller_map.values())
-    elapsed_total = time.time() - start_time
-    logger.info(f"Completed external caller computation in {elapsed_total:.1f}s")
-    console.print(
-        f"[green]✓[/green] Found {total_callers} external caller relationships ({elapsed_total:.1f}s)"
-    )
-    # Detect circular dependencies in caller relationships
-    console.print("[cyan]Detecting circular dependencies...[/cyan]")
-    cycles = detect_cycles(chunks, caller_map)
-    # Mark cycle links
-    cycle_links = []
-    if cycles:
-        console.print(f"[yellow]⚠ Found {len(cycles)} circular dependencies[/yellow]")
-        # For each cycle, create links marking the cycle
-        for cycle in cycles:
-            # Create links for the cycle path: A → B → C → A
-            for i in range(len(cycle)):
-                source = cycle[i]
-                target = cycle[(i + 1) % len(cycle)]  # Wrap around to form cycle
-                cycle_links.append(
-                    {
-                        "source": source,
-                        "target": target,
-                        "type": "caller",
-                        "is_cycle": True,
-                    }
-                )
-    else:
-        console.print("[green]✓[/green] No circular dependencies detected")
+        # Determine parent_id: use parent_chunk_id if exists, else use file node ID
+        file_path_str = str(chunk.file_path)
+        parent_id = chunk.parent_chunk_id
+        if not parent_id and file_path_str in file_nodes:
+            # Top-level chunk: set parent to file node for proper tree structure
+            parent_id = file_nodes[file_path_str]["id"]
-    # Add chunk nodes
-    for chunk in chunks:
-        chunk_id = chunk.chunk_id or chunk.id
         node = {
             "id": chunk_id,
-            "name": chunk.function_name or chunk.class_name or f"L{chunk.start_line}",
+            "name": chunk_name,
             "type": chunk.chunk_type,
-            "file_path": str(chunk.file_path),
+            "file_path": file_path_str,
             "start_line": chunk.start_line,
             "end_line": chunk.end_line,
             "complexity": chunk.complexity_score,
-            "parent_id": chunk.parent_chunk_id,
+            "parent_id": parent_id,  # Now properly set for all chunks
             "depth": chunk.chunk_depth,
             "content": chunk.content,  # Add content for code viewer
             "docstring": chunk.docstring,
             "language": chunk.language,
         }
+        # Add structural analysis metrics if available
+        if (
+            hasattr(chunk, "cognitive_complexity")
+            and chunk.cognitive_complexity is not None
+        ):
+            node["cognitive_complexity"] = chunk.cognitive_complexity
+        if (
+            hasattr(chunk, "cyclomatic_complexity")
+            and chunk.cyclomatic_complexity is not None
+        ):
+            node["cyclomatic_complexity"] = chunk.cyclomatic_complexity
+        if hasattr(chunk, "complexity_grade") and chunk.complexity_grade is not None:
+            node["complexity_grade"] = chunk.complexity_grade
+        if hasattr(chunk, "code_smells") and chunk.code_smells:
+            node["smells"] = chunk.code_smells
+        if hasattr(chunk, "smell_count") and chunk.smell_count is not None:
+            node["smell_count"] = chunk.smell_count
+        if hasattr(chunk, "quality_score") and chunk.quality_score is not None:
+            node["quality_score"] = chunk.quality_score
+        if hasattr(chunk, "lines_of_code") and chunk.lines_of_code is not None:
+            node["lines_of_code"] = chunk.lines_of_code
         # Add caller information if available
         if chunk_id in caller_map:
             node["callers"] = caller_map[chunk_id]
@@ -532,20 +468,8 @@ async def build_graph_data(
         nodes.append(node)
         chunk_id_map[node["id"]] = len(nodes) - 1
-    # Link directories to their parent directories (hierarchical structure)
-    for dir_path_str, dir_info in dir_index.directories.items():
-        if dir_info.parent_path:
-            parent_path_str = str(dir_info.parent_path)
-            if parent_path_str in dir_nodes:
-                parent_dir_id = f"dir_{hash(parent_path_str) & 0xFFFFFFFF:08x}"
-                child_dir_id = f"dir_{hash(dir_path_str) & 0xFFFFFFFF:08x}"
-                links.append(
-                    {
-                        "source": parent_dir_id,
-                        "target": child_dir_id,
-                        "type": "dir_hierarchy",
-                    }
-                )
+    # NOTE: Directory parent→child links already created above via dir_containment
+    # (removed duplicate dir_hierarchy link creation that caused duplicate paths)
     # Link directories to subprojects in monorepos (simple flat structure)
     if subprojects:
@@ -563,10 +487,10 @@ async def build_graph_data(
     # Link files to their parent directories
     for _file_path_str, file_node in file_nodes.items():
-        if file_node.get("parent_dir_id"):
+        if file_node.get("parent_id"):
             links.append(
                 {
-                    "source": file_node["parent_dir_id"],
+                    "source": file_node["parent_id"],
                     "target": file_node["id"],
                     "type": "dir_containment",
                 }
@@ -593,23 +517,22 @@ async def build_graph_data(
                 {
                     "source": f"subproject_{chunk.subproject_name}",
                     "target": chunk_id,
+                    "type": "subproject_containment",
                 }
             )
-        # Link to parent chunk
+        # Link to parent chunk (class -> method hierarchy)
         if chunk.parent_chunk_id and chunk.parent_chunk_id in chunk_id_map:
             links.append(
                 {
                     "source": chunk.parent_chunk_id,
                     "target": chunk_id,
+                    "type": "chunk_hierarchy",  # Explicitly mark chunk parent-child relationships
                 }
             )
-    # Add semantic relationship links
-    links.extend(semantic_links)
-    # Add cycle links
-    links.extend(cycle_links)
+    # Semantic and caller relationships are lazy-loaded via /api/relationships/{chunk_id}
+    # No relationship links at startup for instant loading
     # Parse inter-project dependencies for monorepos
     if subprojects:
@@ -626,6 +549,10 @@ async def build_graph_data(
     # Get stats
     stats = await database.get_stats()
+    # Load trend data for time series visualization
+    trend_tracker = TrendTracker(project_manager.project_root)
+    trend_summary = trend_tracker.get_trend_summary(days=90)  # Last 90 days
     # Build final graph data
     graph_data = {
         "nodes": nodes,
@@ -637,6 +564,7 @@ async def build_graph_data(
             "is_monorepo": len(subprojects) > 0,
             "subprojects": list(subprojects.keys()) if subprojects else [],
         },
+        "trends": trend_summary,  # Include trend data for visualization
     }
     return graph_data
@@ -703,7 +631,12 @@ def apply_state(graph_data: dict, state: VisualizationState) -> dict:
                 filtered_links.append(link)
         elif state.view_mode.value in ("tree_root", "tree_expanded"):
             # In tree modes, show containment edges only
-            if link.get("type") in ("dir_containment", "dir_hierarchy"):
+            # Must include file_containment to link code chunks to their parent files
+            if link.get("type") in (
+                "dir_containment",
+                "dir_hierarchy",
+                "file_containment",
+            ):
                 filtered_links.append(link)
     return {

mcp-vector-search 1.0.3__py3-none-any.whl → 1.1.22__py3-none-any.whl

mcp-vector-search 1.0.3py3-none-any.whl → 1.1.22py3-none-any.whl