PyPI - mcp-vector-search - Versions diffs - 0.0.3__py3-none-any.whl - Mend

mcp-vector-search 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (35) hide show

mcp_vector_search/__init__.py +9 -0
mcp_vector_search/cli/__init__.py +1 -0
mcp_vector_search/cli/commands/__init__.py +1 -0
mcp_vector_search/cli/commands/config.py +303 -0
mcp_vector_search/cli/commands/index.py +304 -0
mcp_vector_search/cli/commands/init.py +212 -0
mcp_vector_search/cli/commands/search.py +395 -0
mcp_vector_search/cli/commands/status.py +340 -0
mcp_vector_search/cli/commands/watch.py +288 -0
mcp_vector_search/cli/main.py +117 -0
mcp_vector_search/cli/output.py +242 -0
mcp_vector_search/config/__init__.py +1 -0
mcp_vector_search/config/defaults.py +175 -0
mcp_vector_search/config/settings.py +108 -0
mcp_vector_search/core/__init__.py +1 -0
mcp_vector_search/core/database.py +431 -0
mcp_vector_search/core/embeddings.py +250 -0
mcp_vector_search/core/exceptions.py +66 -0
mcp_vector_search/core/indexer.py +310 -0
mcp_vector_search/core/models.py +174 -0
mcp_vector_search/core/project.py +304 -0
mcp_vector_search/core/search.py +324 -0
mcp_vector_search/core/watcher.py +320 -0
mcp_vector_search/mcp/__init__.py +1 -0
mcp_vector_search/parsers/__init__.py +1 -0
mcp_vector_search/parsers/base.py +180 -0
mcp_vector_search/parsers/javascript.py +238 -0
mcp_vector_search/parsers/python.py +407 -0
mcp_vector_search/parsers/registry.py +187 -0
mcp_vector_search/py.typed +1 -0
mcp_vector_search-0.0.3.dist-info/METADATA +333 -0
mcp_vector_search-0.0.3.dist-info/RECORD +35 -0
mcp_vector_search-0.0.3.dist-info/WHEEL +4 -0
mcp_vector_search-0.0.3.dist-info/entry_points.txt +2 -0
mcp_vector_search-0.0.3.dist-info/licenses/LICENSE +21 -0

mcp_vector_search/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""MCP Vector Search - CLI-first semantic code search with MCP integration."""
+__version__ = "0.0.3"
+__author__ = "Robert Matsuoka"
+__email__ = "bobmatnyc@gmail.com"
+from .core.exceptions import MCPVectorSearchError
+__all__ = ["MCPVectorSearchError", "__version__"]

mcp_vector_search/cli/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """CLI module for MCP Vector Search."""

mcp_vector_search/cli/commands/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """CLI commands for MCP Vector Search."""

mcp_vector_search/cli/commands/config.py ADDED Viewed

@@ -0,0 +1,303 @@
+"""Config command for MCP Vector Search CLI."""
+from pathlib import Path
+from typing import Optional
+import typer
+from loguru import logger
+from ...core.exceptions import ConfigurationError, ProjectNotFoundError
+from ...core.project import ProjectManager
+from ..output import (
+    console,
+    print_config,
+    print_error,
+    print_info,
+    print_json,
+    print_success,
+)
+# Create config subcommand app
+config_app = typer.Typer(help="Manage project configuration")
+@config_app.command()
+def show(
+    ctx: typer.Context,
+    json_output: bool = typer.Option(
+        False,
+        "--json",
+        help="Output configuration in JSON format",
+    ),
+) -> None:
+    """Show current project configuration."""
+    try:
+        project_root = ctx.obj.get("project_root") or Path.cwd()
+        project_manager = ProjectManager(project_root)
+        if not project_manager.is_initialized():
+            raise ProjectNotFoundError(
+                f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
+            )
+        config = project_manager.load_config()
+        config_dict = config.dict()
+        if json_output:
+            print_json(config_dict, title="Project Configuration")
+        else:
+            console.print("[bold blue]Project Configuration[/bold blue]\n")
+            print_config(config_dict)
+    except ProjectNotFoundError as e:
+        print_error(str(e))
+        raise typer.Exit(1)
+    except Exception as e:
+        logger.error(f"Failed to show configuration: {e}")
+        print_error(f"Failed to show configuration: {e}")
+        raise typer.Exit(1)
+@config_app.command()
+def set(
+    ctx: typer.Context,
+    key: str = typer.Argument(..., help="Configuration key to set"),
+    value: str = typer.Argument(..., help="Configuration value"),
+) -> None:
+    """Set a configuration value.
+    Examples:
+        mcp-vector-search config set similarity_threshold 0.8
+        mcp-vector-search config set embedding_model microsoft/unixcoder-base
+        mcp-vector-search config set cache_embeddings true
+    """
+    try:
+        project_root = ctx.obj.get("project_root") or Path.cwd()
+        project_manager = ProjectManager(project_root)
+        if not project_manager.is_initialized():
+            raise ProjectNotFoundError(
+                f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
+            )
+        config = project_manager.load_config()
+        # Parse and validate the value
+        parsed_value = _parse_config_value(key, value)
+        # Update configuration
+        if hasattr(config, key):
+            setattr(config, key, parsed_value)
+            project_manager.save_config(config)
+            print_success(f"Set {key} = {parsed_value}")
+        else:
+            print_error(f"Unknown configuration key: {key}")
+            _show_available_keys()
+            raise typer.Exit(1)
+    except (ProjectNotFoundError, ConfigurationError) as e:
+        print_error(str(e))
+        raise typer.Exit(1)
+    except Exception as e:
+        logger.error(f"Failed to set configuration: {e}")
+        print_error(f"Failed to set configuration: {e}")
+        raise typer.Exit(1)
+@config_app.command()
+def get(
+    ctx: typer.Context,
+    key: str = typer.Argument(..., help="Configuration key to get"),
+) -> None:
+    """Get a specific configuration value."""
+    try:
+        project_root = ctx.obj.get("project_root") or Path.cwd()
+        project_manager = ProjectManager(project_root)
+        if not project_manager.is_initialized():
+            raise ProjectNotFoundError(
+                f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
+            )
+        config = project_manager.load_config()
+        if hasattr(config, key):
+            value = getattr(config, key)
+            console.print(f"[cyan]{key}[/cyan]: {value}")
+        else:
+            print_error(f"Unknown configuration key: {key}")
+            _show_available_keys()
+            raise typer.Exit(1)
+    except ProjectNotFoundError as e:
+        print_error(str(e))
+        raise typer.Exit(1)
+    except Exception as e:
+        logger.error(f"Failed to get configuration: {e}")
+        print_error(f"Failed to get configuration: {e}")
+        raise typer.Exit(1)
+@config_app.command()
+def reset(
+    ctx: typer.Context,
+    key: Optional[str] = typer.Argument(None, help="Configuration key to reset (resets all if not specified)"),
+    confirm: bool = typer.Option(
+        False,
+        "--yes",
+        "-y",
+        help="Skip confirmation prompt",
+    ),
+) -> None:
+    """Reset configuration to defaults."""
+    try:
+        project_root = ctx.obj.get("project_root") or Path.cwd()
+        project_manager = ProjectManager(project_root)
+        if not project_manager.is_initialized():
+            raise ProjectNotFoundError(
+                f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
+            )
+        if not confirm:
+            from ..output import confirm_action
+            if key:
+                message = f"Reset '{key}' to default value?"
+            else:
+                message = "Reset all configuration to defaults?"
+            if not confirm_action(message, default=False):
+                print_info("Reset cancelled")
+                raise typer.Exit(0)
+        if key:
+            # Reset specific key
+            config = project_manager.load_config()
+            default_value = _get_default_value(key)
+            if hasattr(config, key):
+                setattr(config, key, default_value)
+                project_manager.save_config(config)
+                print_success(f"Reset {key} to default value: {default_value}")
+            else:
+                print_error(f"Unknown configuration key: {key}")
+                raise typer.Exit(1)
+        else:
+            # Reset all configuration by re-initializing
+            from ...config.defaults import DEFAULT_EMBEDDING_MODELS, DEFAULT_FILE_EXTENSIONS
+            config = project_manager.initialize(
+                file_extensions=DEFAULT_FILE_EXTENSIONS,
+                embedding_model=DEFAULT_EMBEDDING_MODELS["code"],
+                similarity_threshold=0.75,
+                force=True,
+            )
+            print_success("Reset all configuration to defaults")
+    except (ProjectNotFoundError, ConfigurationError) as e:
+        print_error(str(e))
+        raise typer.Exit(1)
+    except Exception as e:
+        logger.error(f"Failed to reset configuration: {e}")
+        print_error(f"Failed to reset configuration: {e}")
+        raise typer.Exit(1)
+@config_app.command("list-keys")
+def list_keys() -> None:
+    """List all available configuration keys."""
+    _show_available_keys()
+def _parse_config_value(key: str, value: str):
+    """Parse configuration value based on key type."""
+    # Boolean values
+    if key in ["cache_embeddings", "watch_files"]:
+        return value.lower() in ("true", "yes", "1", "on")
+    # Float values
+    if key in ["similarity_threshold"]:
+        try:
+            parsed = float(value)
+            if key == "similarity_threshold" and not (0.0 <= parsed <= 1.0):
+                raise ValueError("Similarity threshold must be between 0.0 and 1.0")
+            return parsed
+        except ValueError as e:
+            raise ConfigurationError(f"Invalid float value for {key}: {value}") from e
+    # Integer values
+    if key in ["max_chunk_size", "max_cache_size"]:
+        try:
+            parsed = int(value)
+            if parsed <= 0:
+                raise ValueError("Value must be positive")
+            return parsed
+        except ValueError as e:
+            raise ConfigurationError(f"Invalid integer value for {key}: {value}") from e
+    # List values
+    if key in ["file_extensions", "languages"]:
+        if value.startswith("[") and value.endswith("]"):
+            # JSON-style list
+            import json
+            try:
+                return json.loads(value)
+            except json.JSONDecodeError as e:
+                raise ConfigurationError(f"Invalid JSON list for {key}: {value}") from e
+        else:
+            # Comma-separated list
+            items = [item.strip() for item in value.split(",")]
+            if key == "file_extensions":
+                # Ensure extensions start with dot
+                items = [ext if ext.startswith(".") else f".{ext}" for ext in items]
+            return items
+    # Path values
+    if key in ["project_root", "index_path"]:
+        return Path(value)
+    # String values (default)
+    return value
+def _get_default_value(key: str):
+    """Get default value for a configuration key."""
+    from ...config.defaults import DEFAULT_EMBEDDING_MODELS, DEFAULT_FILE_EXTENSIONS
+    defaults = {
+        "file_extensions": DEFAULT_FILE_EXTENSIONS,
+        "embedding_model": DEFAULT_EMBEDDING_MODELS["code"],
+        "similarity_threshold": 0.75,
+        "max_chunk_size": 512,
+        "languages": [],
+        "watch_files": False,
+        "cache_embeddings": True,
+        "max_cache_size": 1000,
+    }
+    return defaults.get(key, "")
+def _show_available_keys() -> None:
+    """Show all available configuration keys."""
+    console.print("\n[bold blue]Available Configuration Keys:[/bold blue]")
+    keys_info = [
+        ("file_extensions", "List of file extensions to index", "list"),
+        ("embedding_model", "Embedding model name", "string"),
+        ("similarity_threshold", "Similarity threshold (0.0-1.0)", "float"),
+        ("max_chunk_size", "Maximum chunk size in tokens", "integer"),
+        ("languages", "Detected programming languages", "list"),
+        ("watch_files", "Enable file watching", "boolean"),
+        ("cache_embeddings", "Enable embedding caching", "boolean"),
+        ("max_cache_size", "Maximum cache size", "integer"),
+    ]
+    for key, description, value_type in keys_info:
+        console.print(f"  [cyan]{key}[/cyan] ({value_type}): {description}")
+    console.print("\n[dim]Use 'mcp-vector-search config set <key> <value>' to change values[/dim]")
+if __name__ == "__main__":
+    config_app()

mcp_vector_search/cli/commands/index.py ADDED Viewed

@@ -0,0 +1,304 @@
+"""Index command for MCP Vector Search CLI."""
+import asyncio
+from pathlib import Path
+from typing import List, Optional
+import typer
+from loguru import logger
+from ...config.defaults import get_default_cache_path
+from ...core.database import ChromaVectorDatabase
+from ...core.embeddings import create_embedding_function, BatchEmbeddingProcessor
+from ...core.exceptions import ProjectNotFoundError
+from ...core.indexer import SemanticIndexer
+from ...core.project import ProjectManager
+from ..output import (
+    console,
+    create_progress,
+    print_error,
+    print_index_stats,
+    print_info,
+    print_success,
+)
+# Create index subcommand app
+index_app = typer.Typer(help="Index codebase for semantic search")
+@index_app.command()
+def main(
+    ctx: typer.Context,
+    watch: bool = typer.Option(
+        False,
+        "--watch",
+        "-w",
+        help="Watch for file changes and update index incrementally",
+    ),
+    incremental: bool = typer.Option(
+        True,
+        "--incremental/--full",
+        help="Use incremental indexing (skip unchanged files)",
+    ),
+    extensions: Optional[str] = typer.Option(
+        None,
+        "--extensions",
+        "-e",
+        help="Override file extensions to index (comma-separated)",
+    ),
+    force: bool = typer.Option(
+        False,
+        "--force",
+        "-f",
+        help="Force reindexing of all files",
+    ),
+    batch_size: int = typer.Option(
+        32,
+        "--batch-size",
+        "-b",
+        help="Batch size for embedding generation",
+        min=1,
+        max=128,
+    ),
+) -> None:
+    """Index your codebase for semantic search.
+    This command parses your code files using Tree-sitter, generates embeddings
+    using the configured model, and stores them in ChromaDB for fast semantic search.
+    Examples:
+        mcp-vector-search index
+        mcp-vector-search index --force --extensions .py,.js
+        mcp-vector-search index --watch
+    """
+    try:
+        project_root = ctx.obj.get("project_root") or Path.cwd()
+        # Run async indexing
+        asyncio.run(run_indexing(
+            project_root=project_root,
+            watch=watch,
+            incremental=incremental,
+            extensions=extensions,
+            force_reindex=force,
+            batch_size=batch_size,
+            show_progress=True,
+        ))
+    except KeyboardInterrupt:
+        print_info("Indexing interrupted by user")
+        raise typer.Exit(0)
+    except Exception as e:
+        logger.error(f"Indexing failed: {e}")
+        print_error(f"Indexing failed: {e}")
+        raise typer.Exit(1)
+async def run_indexing(
+    project_root: Path,
+    watch: bool = False,
+    incremental: bool = True,
+    extensions: Optional[str] = None,
+    force_reindex: bool = False,
+    batch_size: int = 32,
+    show_progress: bool = True,
+) -> None:
+    """Run the indexing process."""
+    # Load project configuration
+    project_manager = ProjectManager(project_root)
+    if not project_manager.is_initialized():
+        raise ProjectNotFoundError(
+            f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
+        )
+    config = project_manager.load_config()
+    # Override extensions if provided
+    file_extensions = config.file_extensions
+    if extensions:
+        file_extensions = [ext.strip() for ext in extensions.split(",")]
+        file_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in file_extensions]
+    print_info(f"Indexing project: {project_root}")
+    print_info(f"File extensions: {', '.join(file_extensions)}")
+    print_info(f"Embedding model: {config.embedding_model}")
+    # Setup embedding function and cache
+    cache_dir = get_default_cache_path(project_root) if config.cache_embeddings else None
+    embedding_function, cache = create_embedding_function(
+        model_name=config.embedding_model,
+        cache_dir=cache_dir,
+        cache_size=config.max_cache_size,
+    )
+    # Setup database
+    database = ChromaVectorDatabase(
+        persist_directory=config.index_path,
+        embedding_function=embedding_function,
+    )
+    # Setup indexer
+    indexer = SemanticIndexer(
+        database=database,
+        project_root=project_root,
+        file_extensions=file_extensions,
+    )
+    try:
+        async with database:
+            if watch:
+                await _run_watch_mode(indexer, show_progress)
+            else:
+                await _run_batch_indexing(indexer, force_reindex, show_progress)
+    except Exception as e:
+        logger.error(f"Indexing error: {e}")
+        raise
+async def _run_batch_indexing(
+    indexer: SemanticIndexer,
+    force_reindex: bool,
+    show_progress: bool,
+) -> None:
+    """Run batch indexing of all files."""
+    if show_progress:
+        with create_progress() as progress:
+            task = progress.add_task("Indexing files...", total=None)
+            # Start indexing
+            indexed_count = await indexer.index_project(
+                force_reindex=force_reindex,
+                show_progress=False,  # We handle progress here
+            )
+            progress.update(task, completed=indexed_count, total=indexed_count)
+    else:
+        indexed_count = await indexer.index_project(
+            force_reindex=force_reindex,
+            show_progress=show_progress,
+        )
+    print_success(f"Indexed {indexed_count} files")
+    # Show statistics
+    stats = await indexer.get_indexing_stats()
+    print_index_stats(stats)
+async def _run_watch_mode(indexer: SemanticIndexer, show_progress: bool) -> None:
+    """Run indexing in watch mode."""
+    print_info("Starting watch mode - press Ctrl+C to stop")
+    # TODO: Implement file watching with incremental updates
+    # This would use the watchdog library to monitor file changes
+    # and call indexer.reindex_file() for changed files
+    print_error("Watch mode not yet implemented")
+    raise NotImplementedError("Watch mode will be implemented in Phase 1B")
+@index_app.command("reindex")
+def reindex_file(
+    ctx: typer.Context,
+    file_path: Path = typer.Argument(
+        ...,
+        help="File to reindex",
+        exists=True,
+        file_okay=True,
+        dir_okay=False,
+        readable=True,
+    ),
+) -> None:
+    """Reindex a specific file."""
+    try:
+        project_root = ctx.obj.get("project_root") or Path.cwd()
+        asyncio.run(_reindex_single_file(project_root, file_path))
+    except Exception as e:
+        logger.error(f"Reindexing failed: {e}")
+        print_error(f"Reindexing failed: {e}")
+        raise typer.Exit(1)
+async def _reindex_single_file(project_root: Path, file_path: Path) -> None:
+    """Reindex a single file."""
+    # Load project configuration
+    project_manager = ProjectManager(project_root)
+    config = project_manager.load_config()
+    # Setup components
+    embedding_function, cache = create_embedding_function(
+        model_name=config.embedding_model,
+        cache_dir=get_default_cache_path(project_root) if config.cache_embeddings else None,
+    )
+    database = ChromaVectorDatabase(
+        persist_directory=config.index_path,
+        embedding_function=embedding_function,
+    )
+    indexer = SemanticIndexer(
+        database=database,
+        project_root=project_root,
+        file_extensions=config.file_extensions,
+    )
+    async with database:
+        success = await indexer.reindex_file(file_path)
+        if success:
+            print_success(f"Reindexed: {file_path}")
+        else:
+            print_error(f"Failed to reindex: {file_path}")
+@index_app.command("clean")
+def clean_index(
+    ctx: typer.Context,
+    confirm: bool = typer.Option(
+        False,
+        "--yes",
+        "-y",
+        help="Skip confirmation prompt",
+    ),
+) -> None:
+    """Clean the search index (remove all indexed data)."""
+    try:
+        project_root = ctx.obj.get("project_root") or Path.cwd()
+        if not confirm:
+            from ..output import confirm_action
+            if not confirm_action("This will delete all indexed data. Continue?", default=False):
+                print_info("Clean operation cancelled")
+                raise typer.Exit(0)
+        asyncio.run(_clean_index(project_root))
+    except Exception as e:
+        logger.error(f"Clean failed: {e}")
+        print_error(f"Clean failed: {e}")
+        raise typer.Exit(1)
+async def _clean_index(project_root: Path) -> None:
+    """Clean the search index."""
+    project_manager = ProjectManager(project_root)
+    config = project_manager.load_config()
+    # Setup database
+    embedding_function, _ = create_embedding_function(config.embedding_model)
+    database = ChromaVectorDatabase(
+        persist_directory=config.index_path,
+        embedding_function=embedding_function,
+    )
+    async with database:
+        await database.reset()
+        print_success("Index cleaned successfully")
+if __name__ == "__main__":
+    index_app()