PyPI - code-context-mcp - Versions diffs - 1.0.0__py3-none-any.whl - Mend

code-context-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

code_context/__init__.py +3 -0
code_context/_background.py +93 -0
code_context/_composition.py +425 -0
code_context/_watcher.py +89 -0
code_context/adapters/__init__.py +0 -0
code_context/adapters/driven/__init__.py +0 -0
code_context/adapters/driven/chunker_dispatcher.py +43 -0
code_context/adapters/driven/chunker_line.py +54 -0
code_context/adapters/driven/chunker_treesitter.py +215 -0
code_context/adapters/driven/chunker_treesitter_queries.py +111 -0
code_context/adapters/driven/code_source_fs.py +122 -0
code_context/adapters/driven/embeddings_local.py +111 -0
code_context/adapters/driven/embeddings_openai.py +58 -0
code_context/adapters/driven/git_source_cli.py +211 -0
code_context/adapters/driven/introspector_fs.py +224 -0
code_context/adapters/driven/keyword_index_sqlite.py +206 -0
code_context/adapters/driven/reranker_crossencoder.py +61 -0
code_context/adapters/driven/symbol_index_sqlite.py +264 -0
code_context/adapters/driven/vector_store_numpy.py +119 -0
code_context/adapters/driving/__init__.py +0 -0
code_context/adapters/driving/mcp_server.py +365 -0
code_context/cli.py +161 -0
code_context/config.py +114 -0
code_context/domain/__init__.py +0 -0
code_context/domain/index_bus.py +52 -0
code_context/domain/models.py +140 -0
code_context/domain/ports.py +205 -0
code_context/domain/use_cases/__init__.py +0 -0
code_context/domain/use_cases/explain_diff.py +98 -0
code_context/domain/use_cases/find_definition.py +30 -0
code_context/domain/use_cases/find_references.py +22 -0
code_context/domain/use_cases/get_file_tree.py +36 -0
code_context/domain/use_cases/get_summary.py +24 -0
code_context/domain/use_cases/indexer.py +336 -0
code_context/domain/use_cases/recent_changes.py +36 -0
code_context/domain/use_cases/search_repo.py +131 -0
code_context/server.py +151 -0
code_context_mcp-1.0.0.dist-info/METADATA +181 -0
code_context_mcp-1.0.0.dist-info/RECORD +43 -0
code_context_mcp-1.0.0.dist-info/WHEEL +5 -0
code_context_mcp-1.0.0.dist-info/entry_points.txt +3 -0
code_context_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
code_context_mcp-1.0.0.dist-info/top_level.txt +1 -0

code_context/adapters/driven/symbol_index_sqlite.py ADDED Viewed

@@ -0,0 +1,264 @@
+"""SymbolIndexSqlite — SQLite-backed adapter for SymbolIndex.
+Stores symbol definitions in an indexed table, references in an FTS5 table
+that's a peer of (but distinct from) Sprint 3's keyword chunks table. This
+adapter persists to its own file (`symbols.sqlite`) for isolation; if the
+composition root harmonises file sharing in a future task, only this
+constant changes.
+"""
+from __future__ import annotations
+import logging
+import re
+import sqlite3
+from collections.abc import Iterable
+from pathlib import Path
+from code_context.domain.models import SymbolDef, SymbolRef
+log = logging.getLogger(__name__)
+_FILE = "symbols.sqlite"
+_DEFS_TABLE = "symbol_defs"
+_REFS_TABLE = "symbol_refs_fts"
+# FTS5 query sanitisation — same logic as keyword_index_sqlite.py.
+# Strip punctuation (FTS5 parses `.`, `-`, `:`, etc. as syntax even
+# though the unicode61 tokenizer accepts them in indexed text), and
+# strip the boolean operators.
+_FTS_KEEP_RE = re.compile(r"[^\w\s]", flags=re.UNICODE)
+_FTS_BOOLEAN_RE = re.compile(r"\b(AND|OR|NOT|NEAR)\b", re.IGNORECASE)
+class SymbolIndexSqlite:
+    """Default SymbolIndex adapter — definitions + references via SQLite + FTS5."""
+    @property
+    def version(self) -> str:
+        return f"symbols-sqlite-{sqlite3.sqlite_version}-v1"
+    def __init__(self) -> None:
+        self._conn: sqlite3.Connection | None = None
+        self._db_path: Path | None = None
+        self._open_inmem()
+    # ---------- public ----------
+    def add_definitions(self, defs: Iterable[SymbolDef]) -> None:
+        assert self._conn is not None
+        rows = [(d.name, d.path, d.lines[0], d.lines[1], d.kind, d.language) for d in defs]
+        if not rows:
+            return
+        self._conn.executemany(
+            f"INSERT INTO {_DEFS_TABLE} "
+            "(name, path, line_start, line_end, kind, language) "
+            "VALUES (?, ?, ?, ?, ?, ?)",
+            rows,
+        )
+        self._conn.commit()
+    def add_references(self, refs: Iterable[tuple[str, int, str]]) -> None:
+        """Bulk-insert reference rows into the FTS5 references table.
+        Each row is (path, line, snippet). Snippet is FTS5-indexed; path and
+        line are UNINDEXED. IndexerUseCase feeds chunk snippets here so that
+        find_references has rows to MATCH against later.
+        """
+        assert self._conn is not None
+        rows = list(refs)
+        if not rows:
+            return
+        self._conn.executemany(
+            f"INSERT INTO {_REFS_TABLE} (path, line, snippet) VALUES (?, ?, ?)",
+            rows,
+        )
+        self._conn.commit()
+    def delete_by_path(self, path: str) -> int:
+        """Remove every row whose path == `path` from BOTH symbol_defs
+        and symbol_refs_fts. Returns the total rowcount across the two
+        tables. Used by Sprint 6 incremental reindex."""
+        assert self._conn is not None
+        defs_cur = self._conn.execute(f"DELETE FROM {_DEFS_TABLE} WHERE path = ?", (path,))
+        refs_cur = self._conn.execute(f"DELETE FROM {_REFS_TABLE} WHERE path = ?", (path,))
+        self._conn.commit()
+        return defs_cur.rowcount + refs_cur.rowcount
+    def find_definition(
+        self,
+        name: str,
+        language: str | None = None,
+        max_count: int = 5,
+    ) -> list[SymbolDef]:
+        assert self._conn is not None
+        if language:
+            cur = self._conn.execute(
+                f"SELECT name, path, line_start, line_end, kind, language "
+                f"FROM {_DEFS_TABLE} WHERE name = ? AND language = ? LIMIT ?",
+                (name, language, max_count),
+            )
+        else:
+            cur = self._conn.execute(
+                f"SELECT name, path, line_start, line_end, kind, language "
+                f"FROM {_DEFS_TABLE} WHERE name = ? LIMIT ?",
+                (name, max_count),
+            )
+        return [
+            SymbolDef(
+                name=row[0],
+                path=row[1],
+                lines=(row[2], row[3]),
+                kind=row[4],
+                language=row[5],
+            )
+            for row in cur.fetchall()
+        ]
+    def find_references(self, name: str, max_count: int = 50) -> list[SymbolRef]:
+        """FTS5 MATCH for the symbol, then expand each chunk to per-line hits.
+        FTS5 stores chunk-level rows (path, chunk_start_line, full_chunk_snippet);
+        we want one SymbolRef per LINE that contains the symbol — that's the
+        contract from tool-protocol.md ("snippet: the matching line, trimmed").
+        Two reasons we do it this way:
+        1. **Contract**: SymbolRef.snippet is "the matching line, trimmed", not
+           "the chunk that contains the matching line". Returning chunks blew
+           past Claude Code's MCP-tool token budget on the very first smoke
+           (a single find_references call returned ~100KB of output).
+        2. **Word boundary**: FTS5's unicode61 tokenizer treats `log` and
+           `logger` as different tokens, so MATCH 'log' won't match 'logger'.
+           But it WILL match `log_format` (split on underscore). The
+           per-line `\\bname\\b` filter catches that and skips lines where
+           `name` only appears as part of a longer identifier.
+        """
+        assert self._conn is not None
+        sanitised = _sanitise(name)
+        if not sanitised:
+            return []
+        try:
+            cur = self._conn.execute(
+                f"SELECT path, line, snippet FROM {_REFS_TABLE} "
+                f"WHERE {_REFS_TABLE} MATCH ? LIMIT ?",
+                (sanitised, max_count * 4),  # over-fetch; per-line expand trims.
+            )
+        except sqlite3.OperationalError as exc:
+            log.warning("symbol refs query failed (%s) for %r → []", exc, name)
+            return []
+        word_re = re.compile(rf"\b{re.escape(name)}\b")
+        out: list[SymbolRef] = []
+        seen: set[tuple[str, int]] = set()
+        for path, chunk_start_line, chunk_snippet in cur.fetchall():
+            for offset, line_text in enumerate(chunk_snippet.splitlines() or [chunk_snippet]):
+                if not word_re.search(line_text):
+                    continue
+                actual_line = int(chunk_start_line) + offset
+                key = (path, actual_line)
+                if key in seen:
+                    continue  # Same line emitted by overlapping chunks.
+                seen.add(key)
+                trimmed = line_text.strip()[:200]
+                out.append(SymbolRef(path=path, line=actual_line, snippet=trimmed))
+                if len(out) >= max_count:
+                    return out
+        return out
+    def persist(self, path: Path) -> None:
+        assert self._conn is not None
+        path.mkdir(parents=True, exist_ok=True)
+        target = path / _FILE
+        # Commit any open implicit transaction first — backup() blocks on
+        # uncommitted writes in the source connection (Windows specifically).
+        self._conn.commit()
+        disk = sqlite3.connect(target, check_same_thread=False)
+        try:
+            self._conn.backup(disk)
+        finally:
+            # sqlite3.Connection's context manager only commits, doesn't close.
+            # Explicit close so Windows releases the file lock for tmp_path
+            # cleanup. Mirrors the same fix in keyword_index_sqlite.py.
+            disk.close()
+        self._db_path = target
+    def load(self, path: Path) -> None:
+        """Restore the symbol index from `<path>/symbols.sqlite` into a
+        fresh in-memory connection. Mirrors keyword_index_sqlite.load —
+        Sprint 6 needs mutations after load to stay in RAM so they don't
+        corrupt the active on-disk index AND a subsequent persist() to
+        the same dir doesn't deadlock on SQLite's backup-to-itself."""
+        target = path / _FILE
+        if not target.exists():
+            raise FileNotFoundError(f"symbol index missing at {target}")
+        if self._conn is not None:
+            self._conn.close()
+        # check_same_thread=False — see _open_inmem rationale.
+        self._conn = sqlite3.connect(":memory:", check_same_thread=False)
+        disk = sqlite3.connect(target, check_same_thread=False)
+        try:
+            disk.backup(self._conn)
+        finally:
+            disk.close()
+        self._db_path = target
+    # ---------- test helpers ----------
+    def populate_references_for_test(self, rows: list[tuple[str, int, str]]) -> None:
+        """Inject rows into the references FTS5 table for unit testing.
+        Bypasses the IndexerUseCase pipeline that normally feeds this table
+        from the chunker output. Production callers should NOT use this; it's
+        exposed because writing through the public API requires running the
+        whole pipeline.
+        """
+        assert self._conn is not None
+        self._conn.executemany(
+            f"INSERT INTO {_REFS_TABLE} (path, line, snippet) VALUES (?, ?, ?)",
+            rows,
+        )
+        self._conn.commit()
+    # ---------- internal ----------
+    def _open_inmem(self) -> None:
+        # check_same_thread=False: the MCP server runs query handlers via
+        # asyncio.to_thread, which uses a thread pool. Without this flag, a
+        # connection opened on the main thread cannot be used from worker
+        # threads (sqlite3.ProgrammingError). SQLite's library is built in
+        # serialized threading mode by default, so a single connection is
+        # safe across threads as long as we don't have concurrent writes —
+        # which we don't (writes happen at indexer.run() time, queries are
+        # read-only). Mirrors the same fix in keyword_index_sqlite.py.
+        self._conn = sqlite3.connect(":memory:", check_same_thread=False)
+        self._init_schema()
+    def _init_schema(self) -> None:
+        assert self._conn is not None
+        self._conn.executescript(
+            f"""
+            CREATE TABLE IF NOT EXISTS {_DEFS_TABLE} (
+                name TEXT NOT NULL,
+                path TEXT NOT NULL,
+                line_start INTEGER NOT NULL,
+                line_end INTEGER NOT NULL,
+                kind TEXT NOT NULL,
+                language TEXT NOT NULL
+            );
+            CREATE INDEX IF NOT EXISTS idx_{_DEFS_TABLE}_name ON {_DEFS_TABLE}(name);
+            CREATE INDEX IF NOT EXISTS idx_{_DEFS_TABLE}_name_lang ON {_DEFS_TABLE}(name, language);
+            CREATE VIRTUAL TABLE IF NOT EXISTS {_REFS_TABLE} USING fts5(
+                path UNINDEXED, line UNINDEXED, snippet,
+                tokenize='unicode61 remove_diacritics 2'
+            );
+            """
+        )
+def _sanitise(query: str) -> str:
+    """Strip FTS5 syntax so user input is bare tokens only. See
+    keyword_index_sqlite._sanitise for the rationale (Sprint 8 fix
+    for the punctuation-crashes-FTS5-parser bug)."""
+    cleaned = _FTS_KEEP_RE.sub(" ", query)
+    cleaned = _FTS_BOOLEAN_RE.sub(" ", cleaned)
+    return " ".join(cleaned.split())

code_context/adapters/driven/vector_store_numpy.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""NumPyParquetStore — brute-force cosine on a NumPy array."""
+from __future__ import annotations
+from collections.abc import Iterable
+from pathlib import Path
+import numpy as np
+import pyarrow as pa
+import pyarrow.parquet as pq
+from code_context.domain.models import Chunk, IndexEntry
+class NumPyParquetStore:
+    """In-memory vectors + chunk metadata, persistable to disk."""
+    _VECTORS_FILE = "vectors.npy"
+    _CHUNKS_FILE = "chunks.parquet"
+    def __init__(self) -> None:
+        self._vectors: np.ndarray | None = None  # (n, d) float32
+        self._chunks: list[Chunk] = []
+    def add(self, entries: Iterable[IndexEntry]) -> None:
+        new_vecs: list[np.ndarray] = []
+        for entry in entries:
+            new_vecs.append(entry.vector)
+            self._chunks.append(entry.chunk)
+        if not new_vecs:
+            return
+        stacked = np.stack(new_vecs).astype(np.float32, copy=False)
+        if self._vectors is None:
+            self._vectors = stacked
+        else:
+            self._vectors = np.concatenate([self._vectors, stacked], axis=0)
+    def delete_by_path(self, path: str) -> int:
+        """Remove every chunk whose path == `path`. Returns the row count
+        removed (0 if nothing matched). Rebuilds `_vectors` via boolean
+        masking; if the deletion empties the store, `_vectors` resets to
+        None so subsequent `search` short-circuits on the empty-store
+        branch (matches the post-`__init__` invariant)."""
+        if self._vectors is None or not self._chunks:
+            return 0
+        keep = [c.path != path for c in self._chunks]
+        n_removed = sum(1 for k in keep if not k)
+        if n_removed == 0:
+            return 0
+        self._vectors = self._vectors[keep]
+        self._chunks = [c for c, k in zip(self._chunks, keep, strict=True) if k]
+        if self._vectors.shape[0] == 0:
+            self._vectors = None
+        return n_removed
+    def search(self, query: np.ndarray, k: int) -> list[tuple[IndexEntry, float]]:
+        if self._vectors is None or self._vectors.shape[0] == 0:
+            return []
+        q = query.astype(np.float32, copy=False)
+        # Normalize query and corpus.
+        q_norm = q / (np.linalg.norm(q) or 1.0)
+        v_norms = np.linalg.norm(self._vectors, axis=1, keepdims=True)
+        v_norms[v_norms == 0] = 1.0
+        normalized = self._vectors / v_norms
+        scores = normalized @ q_norm  # (n,)
+        k = min(k, scores.shape[0])
+        # argpartition + sort just the top-k for performance.
+        if k <= 0:
+            return []
+        top_idx = np.argpartition(-scores, kth=k - 1)[:k]
+        top_idx = top_idx[np.argsort(-scores[top_idx])]
+        return [
+            (IndexEntry(chunk=self._chunks[i], vector=self._vectors[i]), float(scores[i]))
+            for i in top_idx
+        ]
+    def persist(self, path: Path) -> None:
+        path.mkdir(parents=True, exist_ok=True)
+        if self._vectors is None:
+            np.save(path / self._VECTORS_FILE, np.empty((0, 1), dtype=np.float32))
+        else:
+            np.save(path / self._VECTORS_FILE, self._vectors)
+        table = pa.table(
+            {
+                "path": [c.path for c in self._chunks],
+                "line_start": [c.line_start for c in self._chunks],
+                "line_end": [c.line_end for c in self._chunks],
+                "content_hash": [c.content_hash for c in self._chunks],
+                "snippet": [c.snippet for c in self._chunks],
+            }
+        )
+        pq.write_table(table, path / self._CHUNKS_FILE)
+    def load(self, path: Path) -> None:
+        vectors_path = path / self._VECTORS_FILE
+        chunks_path = path / self._CHUNKS_FILE
+        if not vectors_path.exists() or not chunks_path.exists():
+            raise FileNotFoundError(f"index files missing in {path}")
+        self._vectors = np.load(vectors_path).astype(np.float32, copy=False)
+        if self._vectors.shape == (0, 1):
+            self._vectors = None
+        table = pq.read_table(chunks_path)
+        self._chunks = [
+            Chunk(
+                path=p,
+                line_start=ls,
+                line_end=le,
+                content_hash=ch,
+                snippet=sn,
+            )
+            for p, ls, le, ch, sn in zip(
+                table["path"].to_pylist(),
+                table["line_start"].to_pylist(),
+                table["line_end"].to_pylist(),
+                table["content_hash"].to_pylist(),
+                table["snippet"].to_pylist(),
+                strict=True,
+            )
+        ]

code_context/adapters/driving/__init__.py ADDED Viewed

File without changes