PyPI - mcp-vector-search - Versions diffs - 0.0.3__py3-none-any.whl - Mend

mcp-vector-search 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (35) hide show

mcp_vector_search/__init__.py +9 -0
mcp_vector_search/cli/__init__.py +1 -0
mcp_vector_search/cli/commands/__init__.py +1 -0
mcp_vector_search/cli/commands/config.py +303 -0
mcp_vector_search/cli/commands/index.py +304 -0
mcp_vector_search/cli/commands/init.py +212 -0
mcp_vector_search/cli/commands/search.py +395 -0
mcp_vector_search/cli/commands/status.py +340 -0
mcp_vector_search/cli/commands/watch.py +288 -0
mcp_vector_search/cli/main.py +117 -0
mcp_vector_search/cli/output.py +242 -0
mcp_vector_search/config/__init__.py +1 -0
mcp_vector_search/config/defaults.py +175 -0
mcp_vector_search/config/settings.py +108 -0
mcp_vector_search/core/__init__.py +1 -0
mcp_vector_search/core/database.py +431 -0
mcp_vector_search/core/embeddings.py +250 -0
mcp_vector_search/core/exceptions.py +66 -0
mcp_vector_search/core/indexer.py +310 -0
mcp_vector_search/core/models.py +174 -0
mcp_vector_search/core/project.py +304 -0
mcp_vector_search/core/search.py +324 -0
mcp_vector_search/core/watcher.py +320 -0
mcp_vector_search/mcp/__init__.py +1 -0
mcp_vector_search/parsers/__init__.py +1 -0
mcp_vector_search/parsers/base.py +180 -0
mcp_vector_search/parsers/javascript.py +238 -0
mcp_vector_search/parsers/python.py +407 -0
mcp_vector_search/parsers/registry.py +187 -0
mcp_vector_search/py.typed +1 -0
mcp_vector_search-0.0.3.dist-info/METADATA +333 -0
mcp_vector_search-0.0.3.dist-info/RECORD +35 -0
mcp_vector_search-0.0.3.dist-info/WHEEL +4 -0
mcp_vector_search-0.0.3.dist-info/entry_points.txt +2 -0
mcp_vector_search-0.0.3.dist-info/licenses/LICENSE +21 -0

mcp_vector_search/cli/commands/init.py ADDED Viewed

@@ -0,0 +1,212 @@
+"""Init command for MCP Vector Search CLI."""
+from pathlib import Path
+from typing import List, Optional
+import typer
+from loguru import logger
+from ...config.defaults import DEFAULT_EMBEDDING_MODELS, DEFAULT_FILE_EXTENSIONS
+from ...core.exceptions import ProjectInitializationError
+from ...core.project import ProjectManager
+from ..output import (
+    confirm_action,
+    console,
+    print_error,
+    print_info,
+    print_project_info,
+    print_success,
+)
+# Create init subcommand app
+init_app = typer.Typer(help="Initialize project for semantic search")
+@init_app.command()
+def main(
+    ctx: typer.Context,
+    config_file: Optional[Path] = typer.Option(
+        None,
+        "--config",
+        "-c",
+        help="Configuration file to use",
+        exists=True,
+        file_okay=True,
+        dir_okay=False,
+        readable=True,
+    ),
+    extensions: Optional[str] = typer.Option(
+        None,
+        "--extensions",
+        "-e",
+        help="Comma-separated list of file extensions to index (e.g., '.py,.js,.ts')",
+    ),
+    embedding_model: str = typer.Option(
+        DEFAULT_EMBEDDING_MODELS["code"],
+        "--embedding-model",
+        "-m",
+        help="Embedding model to use for semantic search",
+    ),
+    similarity_threshold: float = typer.Option(
+        0.75,
+        "--similarity-threshold",
+        "-s",
+        help="Similarity threshold for search results (0.0 to 1.0)",
+        min=0.0,
+        max=1.0,
+    ),
+    force: bool = typer.Option(
+        False,
+        "--force",
+        "-f",
+        help="Force re-initialization if project is already initialized",
+    ),
+    auto_index: bool = typer.Option(
+        False,
+        "--auto-index",
+        help="Automatically start indexing after initialization",
+    ),
+) -> None:
+    """Initialize a project for semantic code search.
+    This command sets up the necessary configuration and directory structure
+    for MCP Vector Search in your project. It will:
+    - Create a .mcp-vector-search directory for storing the index and configuration
+    - Detect programming languages in your project
+    - Set up default configuration based on your project structure
+    - Optionally start indexing your codebase
+    Examples:
+        mcp-vector-search init
+        mcp-vector-search init --extensions .py,.js,.ts --auto-index
+        mcp-vector-search init --embedding-model microsoft/unixcoder-base --force
+    """
+    try:
+        # Get project root from context or auto-detect
+        project_root = ctx.obj.get("project_root")
+        if not project_root:
+            project_root = Path.cwd()
+        print_info(f"Initializing project at: {project_root}")
+        # Create project manager
+        project_manager = ProjectManager(project_root)
+        # Check if already initialized
+        if project_manager.is_initialized() and not force:
+            print_error("Project is already initialized")
+            print_info("Use --force to re-initialize or run 'mcp-vector-search status' to see current configuration")
+            raise typer.Exit(1)
+        # Parse file extensions
+        file_extensions = None
+        if extensions:
+            file_extensions = [ext.strip() for ext in extensions.split(",")]
+            # Ensure extensions start with dot
+            file_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in file_extensions]
+        else:
+            file_extensions = DEFAULT_FILE_EXTENSIONS
+        # Show what will be initialized
+        console.print("\n[bold blue]Initialization Settings:[/bold blue]")
+        console.print(f"  Project Root: {project_root}")
+        console.print(f"  File Extensions: {', '.join(file_extensions)}")
+        console.print(f"  Embedding Model: {embedding_model}")
+        console.print(f"  Similarity Threshold: {similarity_threshold}")
+        # Confirm initialization
+        if not force and not confirm_action("\nProceed with initialization?", default=True):
+            print_info("Initialization cancelled")
+            raise typer.Exit(0)
+        # Initialize project
+        console.print("\n[bold]Initializing project...[/bold]")
+        config = project_manager.initialize(
+            file_extensions=file_extensions,
+            embedding_model=embedding_model,
+            similarity_threshold=similarity_threshold,
+            force=force,
+        )
+        print_success("Project initialized successfully!")
+        # Show project information
+        console.print()
+        project_info = project_manager.get_project_info()
+        print_project_info(project_info)
+        # Offer to start indexing
+        if auto_index or confirm_action("\nStart indexing your codebase now?", default=True):
+            console.print("\n[bold]Starting indexing...[/bold]")
+            # Import and run indexing (avoid circular imports)
+            import asyncio
+            from .index import run_indexing
+            try:
+                asyncio.run(run_indexing(
+                    project_root=project_root,
+                    force_reindex=False,
+                    show_progress=True,
+                ))
+                print_success("Indexing completed!")
+            except Exception as e:
+                print_error(f"Indexing failed: {e}")
+                print_info("You can run 'mcp-vector-search index' later to index your codebase")
+        else:
+            print_info("Run 'mcp-vector-search index' to index your codebase")
+        # Show next steps
+        console.print("\n[bold green]Next Steps:[/bold green]")
+        console.print("  1. Run [code]mcp-vector-search index[/code] to index your codebase (if not done)")
+        console.print("  2. Run [code]mcp-vector-search search 'your query'[/code] to search your code")
+        console.print("  3. Run [code]mcp-vector-search status[/code] to check indexing status")
+    except ProjectInitializationError as e:
+        print_error(f"Initialization failed: {e}")
+        raise typer.Exit(1)
+    except Exception as e:
+        logger.error(f"Unexpected error during initialization: {e}")
+        print_error(f"Unexpected error: {e}")
+        raise typer.Exit(1)
+@init_app.command("check")
+def check_initialization(ctx: typer.Context) -> None:
+    """Check if the current project is initialized for MCP Vector Search."""
+    try:
+        project_root = ctx.obj.get("project_root") or Path.cwd()
+        project_manager = ProjectManager(project_root)
+        if project_manager.is_initialized():
+            print_success(f"Project is initialized at {project_root}")
+            # Show project info
+            project_info = project_manager.get_project_info()
+            print_project_info(project_info)
+        else:
+            print_error(f"Project is not initialized at {project_root}")
+            print_info("Run 'mcp-vector-search init' to initialize the project")
+            raise typer.Exit(1)
+    except Exception as e:
+        logger.error(f"Error checking initialization: {e}")
+        print_error(f"Error: {e}")
+        raise typer.Exit(1)
+@init_app.command("models")
+def list_embedding_models() -> None:
+    """List available embedding models."""
+    console.print("[bold blue]Available Embedding Models:[/bold blue]\n")
+    for category, model in DEFAULT_EMBEDDING_MODELS.items():
+        console.print(f"[cyan]{category.title()}:[/cyan] {model}")
+    console.print("\n[dim]You can also use any model from Hugging Face that's compatible with sentence-transformers[/dim]")
+if __name__ == "__main__":
+    init_app()

mcp_vector_search/cli/commands/search.py ADDED Viewed

@@ -0,0 +1,395 @@
+"""Search command for MCP Vector Search CLI."""
+import asyncio
+from pathlib import Path
+from typing import List, Optional
+import typer
+from loguru import logger
+from ...core.database import ChromaVectorDatabase
+from ...core.embeddings import create_embedding_function
+from ...core.exceptions import ProjectNotFoundError
+from ...core.project import ProjectManager
+from ...core.search import SemanticSearchEngine
+from ..output import (
+    console,
+    print_error,
+    print_info,
+    print_search_results,
+    print_warning,
+)
+# Create search subcommand app
+search_app = typer.Typer(help="Search code semantically")
+@search_app.command()
+def main(
+    ctx: typer.Context,
+    query: str = typer.Argument(..., help="Search query"),
+    limit: int = typer.Option(
+        10,
+        "--limit",
+        "-l",
+        help="Maximum number of results to return",
+        min=1,
+        max=100,
+    ),
+    files: Optional[str] = typer.Option(
+        None,
+        "--files",
+        "-f",
+        help="Filter by file patterns (e.g., '*.py' or 'src/*.js')",
+    ),
+    language: Optional[str] = typer.Option(
+        None,
+        "--language",
+        help="Filter by programming language",
+    ),
+    function_name: Optional[str] = typer.Option(
+        None,
+        "--function",
+        help="Filter by function name",
+    ),
+    class_name: Optional[str] = typer.Option(
+        None,
+        "--class",
+        help="Filter by class name",
+    ),
+    similarity_threshold: Optional[float] = typer.Option(
+        None,
+        "--threshold",
+        "-t",
+        help="Minimum similarity threshold (0.0 to 1.0)",
+        min=0.0,
+        max=1.0,
+    ),
+    no_content: bool = typer.Option(
+        False,
+        "--no-content",
+        help="Don't show code content in results",
+    ),
+    json_output: bool = typer.Option(
+        False,
+        "--json",
+        help="Output results in JSON format",
+    ),
+) -> None:
+    """Search your codebase semantically.
+    This command performs semantic search across your indexed codebase,
+    finding code that is conceptually similar to your query even if it
+    doesn't contain the exact keywords.
+    Examples:
+        mcp-vector-search search "authentication middleware"
+        mcp-vector-search search "database connection" --language python
+        mcp-vector-search search "error handling" --files "*.js" --limit 5
+        mcp-vector-search search "user validation" --function validate --json
+    """
+    try:
+        project_root = ctx.obj.get("project_root") or Path.cwd()
+        asyncio.run(run_search(
+            project_root=project_root,
+            query=query,
+            limit=limit,
+            files=files,
+            language=language,
+            function_name=function_name,
+            class_name=class_name,
+            similarity_threshold=similarity_threshold,
+            show_content=not no_content,
+            json_output=json_output,
+        ))
+    except Exception as e:
+        logger.error(f"Search failed: {e}")
+        print_error(f"Search failed: {e}")
+        raise typer.Exit(1)
+async def run_search(
+    project_root: Path,
+    query: str,
+    limit: int = 10,
+    files: Optional[str] = None,
+    language: Optional[str] = None,
+    function_name: Optional[str] = None,
+    class_name: Optional[str] = None,
+    similarity_threshold: Optional[float] = None,
+    show_content: bool = True,
+    json_output: bool = False,
+) -> None:
+    """Run semantic search."""
+    # Load project configuration
+    project_manager = ProjectManager(project_root)
+    if not project_manager.is_initialized():
+        raise ProjectNotFoundError(
+            f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
+        )
+    config = project_manager.load_config()
+    # Setup database and search engine
+    embedding_function, _ = create_embedding_function(config.embedding_model)
+    database = ChromaVectorDatabase(
+        persist_directory=config.index_path,
+        embedding_function=embedding_function,
+    )
+    search_engine = SemanticSearchEngine(
+        database=database,
+        project_root=project_root,
+        similarity_threshold=similarity_threshold or config.similarity_threshold,
+    )
+    # Build filters
+    filters = {}
+    if language:
+        filters["language"] = language
+    if function_name:
+        filters["function_name"] = function_name
+    if class_name:
+        filters["class_name"] = class_name
+    if files:
+        # Simple file pattern matching (could be enhanced)
+        filters["file_path"] = files
+    try:
+        async with database:
+            results = await search_engine.search(
+                query=query,
+                limit=limit,
+                filters=filters if filters else None,
+                similarity_threshold=similarity_threshold,
+                include_context=show_content,
+            )
+            if json_output:
+                from ..output import print_json
+                results_data = [result.to_dict() for result in results]
+                print_json(results_data, title="Search Results")
+            else:
+                print_search_results(
+                    results=results,
+                    query=query,
+                    show_content=show_content,
+                )
+    except Exception as e:
+        logger.error(f"Search execution failed: {e}")
+        raise
+@search_app.command("similar")
+def search_similar(
+    ctx: typer.Context,
+    file_path: Path = typer.Argument(
+        ...,
+        help="Reference file path",
+        exists=True,
+        file_okay=True,
+        dir_okay=False,
+        readable=True,
+    ),
+    function_name: Optional[str] = typer.Option(
+        None,
+        "--function",
+        "-f",
+        help="Specific function name to find similar code for",
+    ),
+    limit: int = typer.Option(
+        10,
+        "--limit",
+        "-l",
+        help="Maximum number of results",
+        min=1,
+        max=100,
+    ),
+    similarity_threshold: Optional[float] = typer.Option(
+        None,
+        "--threshold",
+        "-t",
+        help="Minimum similarity threshold",
+        min=0.0,
+        max=1.0,
+    ),
+    json_output: bool = typer.Option(
+        False,
+        "--json",
+        help="Output results in JSON format",
+    ),
+) -> None:
+    """Find code similar to a specific file or function.
+    Examples:
+        mcp-vector-search search similar src/auth.py
+        mcp-vector-search search similar src/utils.py --function validate_email
+    """
+    try:
+        project_root = ctx.obj.get("project_root") or Path.cwd()
+        asyncio.run(run_similar_search(
+            project_root=project_root,
+            file_path=file_path,
+            function_name=function_name,
+            limit=limit,
+            similarity_threshold=similarity_threshold,
+            json_output=json_output,
+        ))
+    except Exception as e:
+        logger.error(f"Similar search failed: {e}")
+        print_error(f"Similar search failed: {e}")
+        raise typer.Exit(1)
+async def run_similar_search(
+    project_root: Path,
+    file_path: Path,
+    function_name: Optional[str] = None,
+    limit: int = 10,
+    similarity_threshold: Optional[float] = None,
+    json_output: bool = False,
+) -> None:
+    """Run similar code search."""
+    project_manager = ProjectManager(project_root)
+    config = project_manager.load_config()
+    embedding_function, _ = create_embedding_function(config.embedding_model)
+    database = ChromaVectorDatabase(
+        persist_directory=config.index_path,
+        embedding_function=embedding_function,
+    )
+    search_engine = SemanticSearchEngine(
+        database=database,
+        project_root=project_root,
+        similarity_threshold=similarity_threshold or config.similarity_threshold,
+    )
+    async with database:
+        results = await search_engine.search_similar(
+            file_path=file_path,
+            function_name=function_name,
+            limit=limit,
+            similarity_threshold=similarity_threshold,
+        )
+        if json_output:
+            from ..output import print_json
+            results_data = [result.to_dict() for result in results]
+            print_json(results_data, title="Similar Code Results")
+        else:
+            query_desc = f"{file_path}"
+            if function_name:
+                query_desc += f" → {function_name}()"
+            print_search_results(
+                results=results,
+                query=f"Similar to: {query_desc}",
+                show_content=True,
+            )
+@search_app.command("context")
+def search_context(
+    ctx: typer.Context,
+    description: str = typer.Argument(..., help="Context description"),
+    focus: Optional[str] = typer.Option(
+        None,
+        "--focus",
+        help="Comma-separated focus areas (e.g., 'security,authentication')",
+    ),
+    limit: int = typer.Option(
+        10,
+        "--limit",
+        "-l",
+        help="Maximum number of results",
+        min=1,
+        max=100,
+    ),
+    json_output: bool = typer.Option(
+        False,
+        "--json",
+        help="Output results in JSON format",
+    ),
+) -> None:
+    """Search for code based on contextual description.
+    Examples:
+        mcp-vector-search search context "implement rate limiting"
+        mcp-vector-search search context "user authentication" --focus security,middleware
+    """
+    try:
+        project_root = ctx.obj.get("project_root") or Path.cwd()
+        focus_areas = None
+        if focus:
+            focus_areas = [area.strip() for area in focus.split(",")]
+        asyncio.run(run_context_search(
+            project_root=project_root,
+            description=description,
+            focus_areas=focus_areas,
+            limit=limit,
+            json_output=json_output,
+        ))
+    except Exception as e:
+        logger.error(f"Context search failed: {e}")
+        print_error(f"Context search failed: {e}")
+        raise typer.Exit(1)
+async def run_context_search(
+    project_root: Path,
+    description: str,
+    focus_areas: Optional[List[str]] = None,
+    limit: int = 10,
+    json_output: bool = False,
+) -> None:
+    """Run contextual search."""
+    project_manager = ProjectManager(project_root)
+    config = project_manager.load_config()
+    embedding_function, _ = create_embedding_function(config.embedding_model)
+    database = ChromaVectorDatabase(
+        persist_directory=config.index_path,
+        embedding_function=embedding_function,
+    )
+    search_engine = SemanticSearchEngine(
+        database=database,
+        project_root=project_root,
+        similarity_threshold=config.similarity_threshold,
+    )
+    async with database:
+        results = await search_engine.search_by_context(
+            context_description=description,
+            focus_areas=focus_areas,
+            limit=limit,
+        )
+        if json_output:
+            from ..output import print_json
+            results_data = [result.to_dict() for result in results]
+            print_json(results_data, title="Context Search Results")
+        else:
+            query_desc = description
+            if focus_areas:
+                query_desc += f" (focus: {', '.join(focus_areas)})"
+            print_search_results(
+                results=results,
+                query=query_desc,
+                show_content=True,
+            )
+if __name__ == "__main__":
+    search_app()