PyPI - code-memory - Versions diffs - 0.1.0__py3-none-any.whl - Mend

code-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

.github/workflows/ci.yml +71 -0
.github/workflows/publish.yml +33 -0
.gitignore +40 -0
.python-version +1 -0
CHANGELOG.md +43 -0
CONTRIBUTING.md +133 -0
LICENSE +21 -0
Makefile +33 -0
PKG-INFO +275 -0
README.md +233 -0
code_memory-0.1.0.dist-info/METADATA +275 -0
code_memory-0.1.0.dist-info/RECORD +37 -0
code_memory-0.1.0.dist-info/WHEEL +4 -0
code_memory-0.1.0.dist-info/entry_points.txt +2 -0
code_memory-0.1.0.dist-info/licenses/LICENSE +21 -0
db.py +403 -0
doc_parser.py +494 -0
errors.py +115 -0
git_search.py +313 -0
logging_config.py +191 -0
parser.py +392 -0
prompts/milestone_1.xml +62 -0
prompts/milestone_2.xml +246 -0
prompts/milestone_3.xml +214 -0
prompts/milestone_4.xml +453 -0
prompts/milestone_5.xml +599 -0
pyproject.toml +92 -0
queries.py +446 -0
server.py +299 -0
tests/__init__.py +1 -0
tests/conftest.py +192 -0
tests/test_errors.py +112 -0
tests/test_logging.py +169 -0
tests/test_tools.py +114 -0
tests/test_validation.py +216 -0
uv.lock +1921 -0
validation.py +316 -0

server.py ADDED Viewed

@@ -0,0 +1,299 @@
+"""
+code-memory MCP Server
+A deterministic, high-precision code intelligence layer exposed via the
+Model Context Protocol (MCP).  Uses a "Progressive Disclosure" routing
+architecture:
+    1. "Who/Why?" → search_history  (Git data)
+    2. "Where/What?" → search_code  (AST data + hybrid retrieval)
+    3. "How?" → search_docs         (Semantic / Fuzzy logic)
+"""
+from __future__ import annotations
+from typing import Literal
+from mcp.server.fastmcp import FastMCP
+import db as db_mod
+import doc_parser as doc_parser_mod
+import errors
+import logging_config
+import parser as parser_mod
+import queries
+import validation as val
+# ── Initialize logging ───────────────────────────────────────────────────
+logger = logging_config.setup_logging()
+tool_logger = logging_config.get_logger("tools")
+# ── Initialize the FastMCP server ────────────────────────────────────────
+mcp = FastMCP("code-memory")
+# ── Tool 1: search_code ───────────────────────────────────────────────────
+@mcp.tool()
+def search_code(
+    query: str,
+    search_type: Literal["definition", "references", "file_structure"],
+) -> dict:
+    """Search the indexed codebase for definitions, references, or file
+    structure.
+    Uses hybrid retrieval (BM25 keyword search + dense vector semantic
+    search) with Reciprocal Rank Fusion for definition queries.
+    - **definition**: Find where a symbol is defined (hybrid search).
+    - **references**: Find all cross-references to a symbol name.
+    - **file_structure**: List all symbols in a file, ordered by line.
+    Run ``index_codebase`` first to populate the search index."""
+    with logging_config.ToolLogger("search_code", query=query, search_type=search_type) as log:
+        try:
+            # Validate inputs
+            query = val.validate_query(query)
+            search_type = val.validate_search_type(
+                search_type, ["definition", "references", "file_structure"]
+            )
+            database = db_mod.get_db()
+            if search_type == "definition":
+                results = queries.find_definition(query, database)
+                log.set_result_count(len(results))
+                return {"status": "ok", "search_type": "definition", "query": query, "results": results}
+            elif search_type == "references":
+                results = queries.find_references(query, database)
+                log.set_result_count(len(results))
+                return {"status": "ok", "search_type": "references", "query": query, "results": results}
+            elif search_type == "file_structure":
+                results = queries.get_file_structure(query, database)
+                log.set_result_count(len(results))
+                return {"status": "ok", "search_type": "file_structure", "query": query, "results": results}
+            return errors.format_error(errors.ValidationError(f"Unknown search_type: {search_type}"))
+        except errors.CodeMemoryError as e:
+            return e.to_dict()
+        except Exception as e:
+            return errors.format_error(e)
+# ── Tool 2: index_codebase ────────────────────────────────────────────────
+@mcp.tool()
+def index_codebase(directory: str = ".") -> dict:
+    """Indexes or re-indexes source files and documentation in the given directory.
+    Run this before using search_code or search_docs to ensure the database
+    is up to date. Uses tree-sitter for language-agnostic structural extraction
+    and generates embeddings for semantic search. Supports Python, JavaScript/
+    TypeScript, Java, Kotlin, Go, Rust, C/C++, Ruby, and more.
+    Also indexes markdown documentation files and extracts docstrings from
+    indexed code symbols. Unchanged files (by mtime) are automatically skipped.
+    Args:
+        directory: The root directory to index (recursively).
+    Returns:
+        Summary of indexing results including code and documentation stats.
+    """
+    with logging_config.ToolLogger("index_codebase", directory=directory) as log:
+        try:
+            # Validate directory
+            directory_path = val.validate_directory(directory)
+            database = db_mod.get_db()
+            # Index code files
+            code_logger = logging_config.IndexingLogger("code")
+            code_logger.start(str(directory_path))
+            code_results = parser_mod.index_directory(str(directory_path), database)
+            for r in code_results:
+                if r.get("skipped"):
+                    code_logger.file_skipped(r.get("file", "unknown"), r.get("reason", "unknown"))
+                else:
+                    code_logger.file_indexed(r.get("file", "unknown"), r.get("symbols_indexed", 0))
+            code_logger.complete()
+            indexed = [r for r in code_results if not r.get("skipped")]
+            skipped = [r for r in code_results if r.get("skipped")]
+            # Index documentation files
+            doc_logger = logging_config.IndexingLogger("documentation")
+            doc_logger.start(str(directory_path))
+            doc_results = doc_parser_mod.index_doc_directory(str(directory_path), database)
+            for r in doc_results:
+                if r.get("skipped"):
+                    doc_logger.file_skipped(r.get("file", "unknown"), r.get("reason", "unknown"))
+                else:
+                    doc_logger.file_indexed(r.get("file", "unknown"), r.get("chunks_indexed", 0))
+            doc_logger.complete()
+            doc_indexed = [r for r in doc_results if not r.get("skipped")]
+            doc_skipped = [r for r in doc_results if r.get("skipped")]
+            # Extract docstrings from indexed code
+            docstring_results = doc_parser_mod.extract_docstrings_from_code(database)
+            total_symbols = sum(r.get("symbols_indexed", 0) for r in indexed)
+            total_chunks = sum(r.get("chunks_indexed", 0) for r in doc_indexed)
+            log.set_result_count(total_symbols + total_chunks + len(docstring_results))
+            return {
+                "status": "ok",
+                "directory": str(directory_path),
+                "code": {
+                    "files_indexed": len(indexed),
+                    "files_skipped": len(skipped),
+                    "total_symbols": total_symbols,
+                    "total_references": sum(r.get("references_indexed", 0) for r in indexed),
+                },
+                "documentation": {
+                    "files_indexed": len(doc_indexed),
+                    "files_skipped": len(doc_skipped),
+                    "total_chunks": total_chunks,
+                    "docstrings_extracted": len(docstring_results),
+                },
+                "details": {
+                    "code": indexed,
+                    "docs": doc_indexed,
+                },
+            }
+        except errors.CodeMemoryError as e:
+            return e.to_dict()
+        except Exception as e:
+            return errors.format_error(e)
+# ── Tool 3: search_docs ────────────────────────────────────────────────────
+@mcp.tool()
+def search_docs(query: str, top_k: int = 10) -> dict:
+    """Use this tool to understand the codebase conceptually. Ideal for
+    'how does X work?', 'explain the architecture', or finding standard
+    operating procedures in the documentation.
+    Uses hybrid retrieval (BM25 keyword search + dense vector semantic
+    search) with Reciprocal Rank Fusion over markdown documentation,
+    README files, and docstrings extracted from code.
+    Args:
+        query: A natural language question about the codebase.
+        top_k: Maximum number of results to return (default 10).
+    Returns:
+        Dictionary with 'results' key containing matching documentation
+        chunks, each with source attribution (file, section, line numbers)
+        and relevance score.
+    """
+    with logging_config.ToolLogger("search_docs", query=query, top_k=top_k) as log:
+        try:
+            # Validate inputs
+            query = val.validate_query(query)
+            top_k = val.validate_top_k(top_k)
+            database = db_mod.get_db()
+            results = queries.search_documentation(query, database, top_k=top_k)
+            log.set_result_count(len(results))
+            return {
+                "status": "ok",
+                "query": query,
+                "results": results,
+                "count": len(results),
+            }
+        except errors.CodeMemoryError as e:
+            return e.to_dict()
+        except Exception as e:
+            return errors.format_error(e)
+# ── Tool 4: search_history ─────────────────────────────────────────────────
+@mcp.tool()
+def search_history(
+    query: str,
+    search_type: Literal["commits", "file_history", "blame", "commit_detail"] = "commits",
+    target_file: str | None = None,
+    line_start: int | None = None,
+    line_end: int | None = None,
+) -> dict:
+    """Search local Git history to debug regressions, understand developer
+    intent, or find out WHY a specific change was made.
+    **search_type options:**
+    - ``commits`` — Search commit messages for *query* (case-insensitive).
+      Optionally filter to commits that touched *target_file*.
+    - ``file_history`` — Show the commit log for *target_file* (follows
+      renames).  *target_file* is required; *query* is ignored.
+    - ``blame`` — Run ``git blame`` on *target_file*, optionally limited to
+      *line_start*–*line_end*.  *target_file* is required.
+    - ``commit_detail`` — Get full metadata and diff for one commit.
+      Pass the commit hash as *query*.  Optionally set *target_file* to
+      restrict the diff to that file.
+    """
+    with logging_config.ToolLogger("search_history", query=query, search_type=search_type,
+                                   target_file=target_file) as log:
+        try:
+            import git_search as gs
+            from git.exc import InvalidGitRepositoryError, NoSuchPathError
+            # Validate inputs
+            search_type = val.validate_search_type(
+                search_type, ["commits", "file_history", "blame", "commit_detail"]
+            )
+            line_start, line_end = val.validate_line_range(line_start, line_end)
+            # Get git repository
+            try:
+                repo = gs.get_repo(".")
+            except (InvalidGitRepositoryError, NoSuchPathError) as exc:
+                raise errors.GitError(f"Git repository not found: {exc}")
+            if search_type == "commits":
+                query = val.validate_query(query, min_length=1)
+                results = gs.search_commits(repo, query, target_file)
+                log.set_result_count(len(results))
+                return {"status": "ok", "search_type": "commits", "query": query, "results": results}
+            elif search_type == "file_history":
+                if not target_file:
+                    raise errors.ValidationError("target_file is required for file_history search")
+                results = gs.get_file_history(repo, target_file)
+                log.set_result_count(len(results))
+                return {"status": "ok", "search_type": "file_history", "target_file": target_file, "results": results}
+            elif search_type == "blame":
+                if not target_file:
+                    raise errors.ValidationError("target_file is required for blame search")
+                results = gs.get_blame(repo, target_file, line_start, line_end)
+                log.set_result_count(len(results))
+                return {"status": "ok", "search_type": "blame", "target_file": target_file, "results": results}
+            elif search_type == "commit_detail":
+                result = gs.get_commit_detail(repo, query, target_file)
+                return {"status": "ok", "search_type": "commit_detail", "result": result}
+            return errors.format_error(errors.ValidationError(f"Unknown search_type: {search_type}"))
+        except errors.CodeMemoryError as e:
+            return e.to_dict()
+        except Exception as e:
+            return errors.format_error(e)
+# ── Entrypoint ────────────────────────────────────────────────────────────
+def main():
+    """Entry point for the MCP server when installed as a package."""
+    mcp.run()
+if __name__ == "__main__":
+    main()

tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Test suite for code-memory."""

tests/conftest.py ADDED Viewed

@@ -0,0 +1,192 @@
+"""
+Shared test fixtures for code-memory tests.
+"""
+from __future__ import annotations
+import os
+import sys
+import sqlite3
+import tempfile
+from pathlib import Path
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import pytest
+@pytest.fixture
+def temp_db():
+    """Provide a temporary in-memory database for tests."""
+    # Use a temporary file for sqlite-vec compatibility
+    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
+        db_path = f.name
+    db = sqlite3.connect(db_path)
+    # Load sqlite-vec
+    try:
+        import sqlite_vec
+        db.enable_load_extension(True)
+        sqlite_vec.load(db)
+        db.enable_load_extension(False)
+    except ImportError:
+        pass  # sqlite-vec not available, skip vector tests
+    yield db
+    db.close()
+    os.unlink(db_path)
+@pytest.fixture
+def temp_dir():
+    """Provide a temporary directory for file tests."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield Path(tmpdir)
+@pytest.fixture
+def sample_python_file(temp_dir):
+    """Create a sample Python file for parsing tests."""
+    code = '''
+"""Module docstring for testing."""
+import os
+from typing import Optional
+class SampleClass:
+    """A sample class for testing."""
+    def __init__(self, name: str):
+        """Initialize the sample class."""
+        self.name = name
+    def get_name(self) -> str:
+        """Return the name."""
+        return self.name
+    def process_data(self, data: Optional[dict] = None) -> dict:
+        """Process some data."""
+        if data is None:
+            data = {}
+        return {"name": self.name, **data}
+def standalone_function(x: int, y: int) -> int:
+    """A standalone function that adds two numbers."""
+    return x + y
+def another_function(text: str) -> str:
+    """Another function for testing."""
+    return text.upper()
+'''
+    filepath = temp_dir / "sample.py"
+    filepath.write_text(code)
+    return filepath
+@pytest.fixture
+def sample_markdown_file(temp_dir):
+    """Create a sample markdown file for documentation tests."""
+    content = """# Sample Documentation
+This is a sample documentation file for testing.
+## Installation
+To install, run:
+```bash
+pip install code-memory
+```
+## Usage
+Here's how to use the tool:
+1. Index your codebase
+2. Search for symbols
+## Architecture
+The system uses a Progressive Disclosure architecture.
+### Components
+- search_code: Find definitions
+- search_docs: Find documentation
+- search_history: Search git history
+"""
+    filepath = temp_dir / "README.md"
+    filepath.write_text(content)
+    return filepath
+@pytest.fixture
+def temp_git_repo(temp_dir):
+    """Provide a temporary git repository for tests."""
+    import subprocess
+    # Initialize git repo
+    subprocess.run(["git", "init"], cwd=temp_dir, check=True, capture_output=True)
+    subprocess.run(["git", "config", "user.email", "test@test.com"], cwd=temp_dir, check=True, capture_output=True)
+    subprocess.run(["git", "config", "user.name", "Test User"], cwd=temp_dir, check=True, capture_output=True)
+    # Create and commit a file
+    test_file = temp_dir / "test.py"
+    test_file.write_text("# Test file\nprint('hello')\n")
+    subprocess.run(["git", "add", "."], cwd=temp_dir, check=True, capture_output=True)
+    subprocess.run(["git", "commit", "-m", "Initial commit"], cwd=temp_dir, check=True, capture_output=True)
+    yield temp_dir
+@pytest.fixture
+def sample_symbols_db(temp_db):
+    """Provide a database with sample symbols for search tests."""
+    # Create minimal schema
+    temp_db.execute("""
+        CREATE TABLE IF NOT EXISTS files (
+            id INTEGER PRIMARY KEY,
+            path TEXT UNIQUE NOT NULL,
+            last_modified REAL NOT NULL,
+            file_hash TEXT NOT NULL
+        )
+    """)
+    temp_db.execute("""
+        CREATE TABLE IF NOT EXISTS symbols (
+            id INTEGER PRIMARY KEY,
+            name TEXT NOT NULL,
+            kind TEXT NOT NULL,
+            file_id INTEGER NOT NULL,
+            line_start INTEGER NOT NULL,
+            line_end INTEGER NOT NULL,
+            parent_symbol_id INTEGER,
+            source_text TEXT NOT NULL
+        )
+    """)
+    # Insert sample data
+    temp_db.execute("INSERT INTO files (path, last_modified, file_hash) VALUES (?, ?, ?)",
+                    ("/test/sample.py", 0.0, "abc123"))
+    file_id = temp_db.lastrowid
+    symbols = [
+        ("SampleClass", "class", file_id, 5, 20, None, "class SampleClass: ..."),
+        ("__init__", "method", file_id, 8, 10, 1, "def __init__(self, name): ..."),
+        ("get_name", "method", file_id, 12, 14, 1, "def get_name(self): ..."),
+        ("standalone_function", "function", file_id, 22, 24, None, "def standalone_function(x, y): ..."),
+    ]
+    for name, kind, fid, line_start, line_end, parent, source in symbols:
+        temp_db.execute(
+            "INSERT INTO symbols (name, kind, file_id, line_start, line_end, parent_symbol_id, source_text) VALUES (?, ?, ?, ?, ?, ?, ?)",
+            (name, kind, fid, line_start, line_end, parent, source)
+        )
+    temp_db.commit()
+    return temp_db

tests/test_errors.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""Tests for error handling module."""
+from __future__ import annotations
+import pytest
+from errors import (
+    CodeMemoryError,
+    DatabaseError,
+    IndexingError,
+    GitError,
+    ValidationError,
+    EmbeddingError,
+    format_error,
+)
+class TestCodeMemoryError:
+    """Tests for base CodeMemoryError class."""
+    def test_basic_error(self):
+        """Test basic error creation."""
+        error = CodeMemoryError("Test error")
+        assert error.message == "Test error"
+        assert str(error) == "Test error"
+    def test_error_with_details(self):
+        """Test error with additional details."""
+        error = CodeMemoryError("Test error", {"key": "value"})
+        assert error.details == {"key": "value"}
+    def test_to_dict(self):
+        """Test conversion to dict."""
+        error = CodeMemoryError("Test error")
+        result = error.to_dict()
+        assert result["error"] is True
+        assert result["error_type"] == "CodeMemoryError"
+        assert result["message"] == "Test error"
+    def test_to_dict_with_details(self):
+        """Test conversion to dict with details."""
+        error = CodeMemoryError("Test error", {"key": "value"})
+        result = error.to_dict()
+        assert result["details"] == {"key": "value"}
+    def test_to_dict_without_details(self):
+        """Test that None details are preserved."""
+        error = CodeMemoryError("Test error", {})
+        result = error.to_dict()
+        assert result["details"] is None
+class TestSpecializedErrors:
+    """Tests for specialized error classes."""
+    def test_database_error(self):
+        """Test DatabaseError."""
+        error = DatabaseError("Connection failed")
+        assert isinstance(error, CodeMemoryError)
+        assert error.to_dict()["error_type"] == "DatabaseError"
+    def test_indexing_error(self):
+        """Test IndexingError."""
+        error = IndexingError("Parse failed")
+        assert isinstance(error, CodeMemoryError)
+        assert error.to_dict()["error_type"] == "IndexingError"
+    def test_git_error(self):
+        """Test GitError."""
+        error = GitError("Not a git repo")
+        assert isinstance(error, CodeMemoryError)
+        assert error.to_dict()["error_type"] == "GitError"
+    def test_validation_error(self):
+        """Test ValidationError."""
+        error = ValidationError("Invalid input")
+        assert isinstance(error, CodeMemoryError)
+        assert error.to_dict()["error_type"] == "ValidationError"
+    def test_embedding_error(self):
+        """Test EmbeddingError."""
+        error = EmbeddingError("Model load failed")
+        assert isinstance(error, CodeMemoryError)
+        assert error.to_dict()["error_type"] == "EmbeddingError"
+class TestFormatError:
+    """Tests for format_error function."""
+    def test_format_code_memory_error(self):
+        """Test formatting CodeMemoryError."""
+        error = ValidationError("Invalid input", {"field": "query"})
+        result = format_error(error)
+        assert result["error"] is True
+        assert result["error_type"] == "ValidationError"
+        assert result["message"] == "Invalid input"
+        assert result["details"] == {"field": "query"}
+    def test_format_builtin_exception(self):
+        """Test formatting built-in exceptions."""
+        error = ValueError("Something went wrong")
+        result = format_error(error)
+        assert result["error"] is True
+        assert result["error_type"] == "ValueError"
+        assert result["message"] == "Something went wrong"
+    def test_format_exception_without_message(self):
+        """Test formatting exception with empty message."""
+        error = RuntimeError()
+        result = format_error(error)
+        assert result["error"] is True
+        assert "RuntimeError" in result["message"]