PyPI - code-context-mcp - Versions diffs - 1.0.0__py3-none-any.whl - Mend

code-context-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

code_context/__init__.py +3 -0
code_context/_background.py +93 -0
code_context/_composition.py +425 -0
code_context/_watcher.py +89 -0
code_context/adapters/__init__.py +0 -0
code_context/adapters/driven/__init__.py +0 -0
code_context/adapters/driven/chunker_dispatcher.py +43 -0
code_context/adapters/driven/chunker_line.py +54 -0
code_context/adapters/driven/chunker_treesitter.py +215 -0
code_context/adapters/driven/chunker_treesitter_queries.py +111 -0
code_context/adapters/driven/code_source_fs.py +122 -0
code_context/adapters/driven/embeddings_local.py +111 -0
code_context/adapters/driven/embeddings_openai.py +58 -0
code_context/adapters/driven/git_source_cli.py +211 -0
code_context/adapters/driven/introspector_fs.py +224 -0
code_context/adapters/driven/keyword_index_sqlite.py +206 -0
code_context/adapters/driven/reranker_crossencoder.py +61 -0
code_context/adapters/driven/symbol_index_sqlite.py +264 -0
code_context/adapters/driven/vector_store_numpy.py +119 -0
code_context/adapters/driving/__init__.py +0 -0
code_context/adapters/driving/mcp_server.py +365 -0
code_context/cli.py +161 -0
code_context/config.py +114 -0
code_context/domain/__init__.py +0 -0
code_context/domain/index_bus.py +52 -0
code_context/domain/models.py +140 -0
code_context/domain/ports.py +205 -0
code_context/domain/use_cases/__init__.py +0 -0
code_context/domain/use_cases/explain_diff.py +98 -0
code_context/domain/use_cases/find_definition.py +30 -0
code_context/domain/use_cases/find_references.py +22 -0
code_context/domain/use_cases/get_file_tree.py +36 -0
code_context/domain/use_cases/get_summary.py +24 -0
code_context/domain/use_cases/indexer.py +336 -0
code_context/domain/use_cases/recent_changes.py +36 -0
code_context/domain/use_cases/search_repo.py +131 -0
code_context/server.py +151 -0
code_context_mcp-1.0.0.dist-info/METADATA +181 -0
code_context_mcp-1.0.0.dist-info/RECORD +43 -0
code_context_mcp-1.0.0.dist-info/WHEEL +5 -0
code_context_mcp-1.0.0.dist-info/entry_points.txt +3 -0
code_context_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
code_context_mcp-1.0.0.dist-info/top_level.txt +1 -0

code_context/domain/index_bus.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""IndexUpdateBus — minimal threadsafe pub-sub for index-swap events.
+Sprint 7's background indexer runs reindex on a daemon thread and
+publishes a "swap" notification to this bus when a fresh index dir
+becomes the active one. Search use cases consult `generation` to
+short-circuit the no-op path with an int compare; on detected drift,
+they reload their store handles from the active index dir before
+serving the next query.
+Pure domain — no I/O. Thread safety: a single `Lock` guards
+`generation` and `subscribers`. Subscriber callbacks fire OUTSIDE the
+lock (so a misbehaving subscriber can't deadlock the publisher); a
+bad subscriber raising an exception is logged-and-swallowed so the
+publisher's contract (monotonic generation, no lost events for
+well-behaved subscribers) holds.
+"""
+from __future__ import annotations
+import logging
+import threading
+from collections.abc import Callable
+log = logging.getLogger(__name__)
+class IndexUpdateBus:
+    def __init__(self) -> None:
+        self._lock = threading.Lock()
+        self._gen = 0
+        self._subs: list[Callable[[str], None]] = []
+    @property
+    def generation(self) -> int:
+        with self._lock:
+            return self._gen
+    def subscribe(self, fn: Callable[[str], None]) -> None:
+        with self._lock:
+            self._subs.append(fn)
+    def publish_swap(self, new_index_dir: str) -> None:
+        with self._lock:
+            self._gen += 1
+            subs = list(self._subs)
+        # Fire callbacks without holding the lock — a slow subscriber
+        # must not block other publishers.
+        for fn in subs:
+            try:
+                fn(new_index_dir)
+            except Exception:  # noqa: BLE001 - subscriber bug must not break publisher
+                log.exception("IndexUpdateBus subscriber raised; continuing")

code_context/domain/models.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""Domain models. Pure data; no I/O.
+These dataclasses are the boundary types of the application. The 3 contract
+return types (SearchResult, Change, ProjectSummary) match docs/tool-protocol.md
+in context-template byte-for-byte at the field level.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+import numpy as np
+@dataclass(frozen=True, slots=True)
+class Chunk:
+    """A piece of code (text fragment) ready to embed."""
+    path: str
+    line_start: int
+    line_end: int
+    content_hash: str  # sha256 of snippet, hex string
+    snippet: str
+@dataclass(frozen=True, slots=True)
+class IndexEntry:
+    """A chunk plus its embedding vector. Lives in the vector store."""
+    chunk: Chunk
+    vector: np.ndarray  # shape: (dimension,), dtype float32
+@dataclass(frozen=True, slots=True)
+class SearchResult:
+    """Result of search_repo. Matches tool-protocol.md SearchResult."""
+    path: str
+    lines: tuple[int, int]
+    snippet: str
+    score: float
+    why: str
+@dataclass(frozen=True, slots=True)
+class Change:
+    """Result of recent_changes. Matches tool-protocol.md Change."""
+    sha: str
+    date: datetime
+    author: str
+    paths: list[str]
+    summary: str
+@dataclass(frozen=True, slots=True)
+class ProjectSummary:
+    """Result of get_summary. Matches tool-protocol.md ProjectSummary."""
+    name: str
+    purpose: str
+    stack: list[str]
+    entry_points: list[str]
+    key_modules: list[dict[str, str]] = field(default_factory=list)
+    stats: dict[str, Any] = field(default_factory=dict)
+@dataclass(frozen=True, slots=True)
+class SymbolDef:
+    """Result of find_definition. Matches tool-protocol.md SymbolDef (v1.1)."""
+    name: str
+    path: str
+    lines: tuple[int, int]
+    kind: str  # "function" | "class" | "method" | "type" | "enum" | "interface" | "struct" | ...
+    language: str  # "python" | "javascript" | "typescript" | "go" | "rust" | "csharp"
+@dataclass(frozen=True, slots=True)
+class SymbolRef:
+    """Result of find_references. Matches tool-protocol.md SymbolRef (v1.1)."""
+    path: str
+    line: int
+    snippet: str
+@dataclass(frozen=True, slots=True)
+class FileTreeNode:
+    """Result of get_file_tree. Matches tool-protocol.md FileTreeNode (v1.2)."""
+    path: str
+    kind: str  # "file" | "dir"
+    children: tuple[FileTreeNode, ...] = ()
+    size: int | None = None  # bytes; None for dirs
+@dataclass(frozen=True, slots=True)
+class DiffFile:
+    """Per-file diff hunks returned by GitSource.diff_files (v1.2 internal type)."""
+    path: str
+    hunks: tuple[tuple[int, int], ...]  # list of (start_line, end_line) in the new file
+@dataclass(frozen=True, slots=True)
+class DiffChunk:
+    """Result of explain_diff. Matches tool-protocol.md DiffChunk (v1.2)."""
+    path: str
+    lines: tuple[int, int]
+    snippet: str
+    kind: str  # "function" | "class" | "method" | ... | "fragment"
+    change: str  # "added" | "modified" | "deleted"
+@dataclass(frozen=True, slots=True)
+class StaleSet:
+    """Per-file staleness verdict driving incremental reindex (Sprint 6).
+    `full_reindex_required` is the authoritative "blow it all away" flag —
+    set on first run (no current index), or when a global invalidator
+    changed (embeddings model id, chunker version, keyword/symbol index
+    versions, metadata schema upgrade). When True, the file lists are
+    advisory only; callers should ignore them and run a full reindex.
+    Otherwise, `dirty_files` are absolute paths that need re-chunking +
+    re-embedding (content hash drift); `deleted_files` are repo-relative
+    paths that vanished since last index and whose rows must be purged
+    from every store. An all-empty StaleSet with full_reindex_required=
+    False is the steady-state "no work" signal.
+    """
+    full_reindex_required: bool
+    reason: str  # human-readable summary for logs / `code-context status`
+    dirty_files: tuple[Path, ...] = ()
+    deleted_files: tuple[str, ...] = ()

code_context/domain/ports.py ADDED Viewed

@@ -0,0 +1,205 @@
+"""Driven ports — interfaces that the domain calls.
+Each port is a Protocol (PEP 544 structural typing). Adapters implement them
+duck-style; no inheritance required. Tests mock by writing a class that has
+the same methods.
+"""
+from __future__ import annotations
+from collections.abc import Iterable
+from datetime import datetime
+from pathlib import Path
+from typing import Protocol
+import numpy as np
+from code_context.domain.models import (
+    Change,
+    Chunk,
+    DiffFile,
+    FileTreeNode,
+    IndexEntry,
+    ProjectSummary,
+    SymbolDef,
+    SymbolRef,
+)
+class EmbeddingsProvider(Protocol):
+    """Embeds text. Default: LocalST (sentence-transformers)."""
+    @property
+    def dimension(self) -> int: ...
+    @property
+    def model_id(self) -> str:
+        """Identifier including library version, used for staleness detection."""
+    def embed(self, texts: list[str]) -> np.ndarray:
+        """Returns shape (len(texts), dimension), dtype float32."""
+class VectorStore(Protocol):
+    """Persistent vector store. Default: NumPyParquetStore."""
+    def add(self, entries: Iterable[IndexEntry]) -> None: ...
+    def search(self, query: np.ndarray, k: int) -> list[tuple[IndexEntry, float]]:
+        """Returns top-k entries with cosine similarity scores, descending."""
+    def delete_by_path(self, path: str) -> int:
+        """Remove every entry whose chunk.path == `path`. Returns the row
+        count removed. Used by incremental reindex (Sprint 6) to purge a
+        file's chunks before re-adding fresh ones."""
+    def persist(self, path: Path) -> None:
+        """Writes vectors.npy + chunks.parquet under path/."""
+    def load(self, path: Path) -> None:
+        """Loads from path/."""
+class Chunker(Protocol):
+    """Splits source code text into chunks. Default: LineChunker."""
+    @property
+    def version(self) -> str:
+        """Identifier for staleness detection."""
+    def chunk(self, content: str, path: str) -> list[Chunk]: ...
+class CodeSource(Protocol):
+    """Lists and reads source files. Default: FilesystemSource."""
+    def list_files(self, root: Path, include_exts: list[str], max_bytes: int) -> list[Path]: ...
+    def read(self, path: Path) -> str: ...
+    def walk_tree(
+        self,
+        root: Path,
+        max_depth: int = 4,
+        include_hidden: bool = False,
+        subpath: Path | None = None,
+    ) -> FileTreeNode:
+        """Walk the filesystem rooted at `root` (or `root/subpath` if given)
+        and return a hierarchical FileTreeNode. Honors .gitignore. Skips
+        binary files. Caps recursion at `max_depth`."""
+class GitSource(Protocol):
+    """Reads git state. Default: GitCliSource."""
+    def is_repo(self, root: Path) -> bool: ...
+    def head_sha(self, root: Path) -> str:
+        """Empty string if not a repo."""
+    def commits(
+        self,
+        root: Path,
+        since: datetime | None = None,
+        paths: list[str] | None = None,
+        max_count: int = 20,
+    ) -> list[Change]: ...
+    def diff_files(self, root: Path, ref: str) -> list[DiffFile]:
+        """Return per-file diff hunks for the commit at `ref` (or worktree
+        diff against HEAD if ref=='HEAD' is given the current behavior).
+        Each DiffFile.hunks is a tuple of (start_line, end_line) ranges in
+        the *new* version of the file (post-commit). Empty list if not a
+        repo."""
+class ProjectIntrospector(Protocol):
+    """Builds a ProjectSummary. Default: FilesystemIntrospector."""
+    def summary(
+        self, root: Path, scope: str = "project", path: Path | None = None
+    ) -> ProjectSummary: ...
+class KeywordIndex(Protocol):
+    """Keyword-based index for exact-identifier search. Default: SqliteFTS5Index."""
+    @property
+    def version(self) -> str:
+        """Identifier for staleness detection."""
+    def add(self, entries: Iterable[IndexEntry]) -> None: ...
+    def search(self, query: str, k: int) -> list[tuple[IndexEntry, float]]:
+        """Returns top-k entries with BM25-style scores, descending."""
+    def delete_by_path(self, path: str) -> int:
+        """Remove every row whose path == `path`. Returns the row count
+        removed. Used by incremental reindex (Sprint 6)."""
+    def persist(self, path: Path) -> None: ...
+    def load(self, path: Path) -> None: ...
+class Reranker(Protocol):
+    """Re-orders search candidates with a more accurate model. Optional."""
+    @property
+    def version(self) -> str: ...
+    @property
+    def model_id(self) -> str: ...
+    def rerank(
+        self,
+        query: str,
+        candidates: list[tuple[IndexEntry, float]],
+        k: int,
+    ) -> list[tuple[IndexEntry, float]]:
+        """Returns the top-k candidates re-scored by the reranker, descending."""
+class SymbolIndex(Protocol):
+    """Index of named symbols (definitions + textual references).
+    Definitions come from the chunker's AST extraction (see
+    TreeSitterChunker.extract_definitions in v0.5.0). References are derived
+    from the keyword index's snippet text — they share an on-disk file in
+    the default SQLite-backed adapter to avoid duplicate I/O.
+    """
+    @property
+    def version(self) -> str:
+        """Identifier for staleness detection."""
+    def add_definitions(self, defs: Iterable[SymbolDef]) -> None: ...
+    def add_references(self, refs: Iterable[tuple[str, int, str]]) -> None:
+        """Bulk-insert reference rows: (path, line, snippet) triples.
+        Snippet text is full-text-indexed; path and line are stored verbatim.
+        IndexerUseCase feeds chunks here so find_references has rows to match
+        against. Adapters that don't track references (e.g., a null adapter)
+        may no-op.
+        """
+    def find_definition(
+        self,
+        name: str,
+        language: str | None = None,
+        max_count: int = 5,
+    ) -> list[SymbolDef]:
+        """Returns symbol definitions matching `name`, optionally filtered by language."""
+    def find_references(self, name: str, max_count: int = 50) -> list[SymbolRef]:
+        """Returns lines mentioning `name` as a whole-word match (no `log` → `logger`)."""
+    def delete_by_path(self, path: str) -> int:
+        """Remove every definition AND reference row whose path == `path`.
+        Returns the total row count removed across both tables. Used by
+        incremental reindex (Sprint 6)."""
+    def persist(self, path: Path) -> None: ...
+    def load(self, path: Path) -> None: ...

code_context/domain/use_cases/__init__.py ADDED Viewed

File without changes

code_context/domain/use_cases/explain_diff.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""ExplainDiffUseCase — combines GitSource.diff_files with the chunker.
+For each diff hunk in `ref`, find the AST-aligned chunk that contains
+the affected lines. If the chunker produced no chunks for a file (e.g.
+it's a binary file or an unsupported language), emit a "fragment" chunk
+with the raw line range — caller can still see WHAT changed even if
+not at AST granularity.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from code_context.domain.models import DiffChunk
+from code_context.domain.ports import Chunker, CodeSource, GitSource
+@dataclass
+class ExplainDiffUseCase:
+    chunker: Chunker
+    code_source: CodeSource
+    git_source: GitSource
+    repo_root: Path
+    def run(self, ref: str, max_chunks: int = 50) -> list[DiffChunk]:
+        diff_files = self.git_source.diff_files(self.repo_root, ref)
+        results: list[DiffChunk] = []
+        seen: set[tuple[str, int, int]] = set()  # (path, line_start, line_end)
+        for diff_file in diff_files:
+            file_path = self.repo_root / diff_file.path
+            try:
+                content = self.code_source.read(file_path)
+            except (OSError, UnicodeDecodeError):
+                # Likely binary or deleted in HEAD. Emit raw-line fragments.
+                for hunk_start, hunk_end in diff_file.hunks:
+                    key = (diff_file.path, hunk_start, hunk_end)
+                    if key in seen:
+                        continue
+                    seen.add(key)
+                    results.append(
+                        DiffChunk(
+                            path=diff_file.path,
+                            lines=(hunk_start, hunk_end),
+                            snippet="",
+                            kind="fragment",
+                            change="modified",
+                        )
+                    )
+                    if len(results) >= max_chunks:
+                        return results
+                continue
+            chunks = self.chunker.chunk(content, diff_file.path)
+            for hunk_start, hunk_end in diff_file.hunks:
+                # Find AST chunks whose line range overlaps the hunk.
+                overlapping = [
+                    c for c in chunks if c.line_start <= hunk_end and c.line_end >= hunk_start
+                ]
+                if not overlapping:
+                    # Hunk fell between chunks (e.g., top-of-file imports);
+                    # emit a fragment with the raw line range.
+                    key = (diff_file.path, hunk_start, hunk_end)
+                    if key in seen:
+                        continue
+                    seen.add(key)
+                    snippet_lines = content.splitlines()[hunk_start - 1 : hunk_end]
+                    results.append(
+                        DiffChunk(
+                            path=diff_file.path,
+                            lines=(hunk_start, hunk_end),
+                            snippet="\n".join(snippet_lines),
+                            kind="fragment",
+                            change="modified",
+                        )
+                    )
+                else:
+                    for chunk in overlapping:
+                        key = (diff_file.path, chunk.line_start, chunk.line_end)
+                        if key in seen:
+                            continue
+                        seen.add(key)
+                        results.append(
+                            DiffChunk(
+                                path=diff_file.path,
+                                lines=(chunk.line_start, chunk.line_end),
+                                snippet=chunk.snippet,
+                                kind="function",  # Chunker doesn't expose node-level kind;
+                                # tree-sitter would give more granularity but
+                                # Chunker port doesn't expose it. v0.8 follow-up.
+                                change="modified",
+                            )
+                        )
+                        if len(results) >= max_chunks:
+                            return results
+        return results[:max_chunks]

code_context/domain/use_cases/find_definition.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""FindDefinitionUseCase — delegates to SymbolIndex."""
+from __future__ import annotations
+from dataclasses import dataclass
+from code_context.domain.models import SymbolDef
+from code_context.domain.ports import SymbolIndex
+@dataclass
+class FindDefinitionUseCase:
+    """Use case for the find_definition MCP tool.
+    Thin delegate over SymbolIndex.find_definition. Ranking, language
+    filtering, and max-count semantics live in the adapter; this layer
+    only exists to keep the MCP driving adapter free of port-specific
+    knowledge (mirrors the pattern of RecentChangesUseCase and
+    GetSummaryUseCase).
+    """
+    symbol_index: SymbolIndex
+    def run(
+        self,
+        name: str,
+        language: str | None = None,
+        max_count: int = 5,
+    ) -> list[SymbolDef]:
+        return self.symbol_index.find_definition(name, language=language, max_count=max_count)

code_context/domain/use_cases/find_references.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""FindReferencesUseCase — delegates to SymbolIndex."""
+from __future__ import annotations
+from dataclasses import dataclass
+from code_context.domain.models import SymbolRef
+from code_context.domain.ports import SymbolIndex
+@dataclass
+class FindReferencesUseCase:
+    """Use case for the find_references MCP tool.
+    Thin delegate over SymbolIndex.find_references. Word-boundary matching
+    and result ordering are the adapter's responsibility.
+    """
+    symbol_index: SymbolIndex
+    def run(self, name: str, max_count: int = 50) -> list[SymbolRef]:
+        return self.symbol_index.find_references(name, max_count=max_count)

code_context/domain/use_cases/get_file_tree.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""GetFileTreeUseCase — delegates to CodeSource."""
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from code_context.domain.models import FileTreeNode
+from code_context.domain.ports import CodeSource
+@dataclass
+class GetFileTreeUseCase:
+    """Use case for the get_file_tree MCP tool.
+    Thin delegate over CodeSource.walk_tree. The MCP server flattens
+    the FileTreeNode tree into JSON; this layer keeps the use case
+    Path-aware.
+    """
+    code_source: CodeSource
+    repo_root: Path
+    def run(
+        self,
+        path: str | None = None,
+        max_depth: int = 4,
+        include_hidden: bool = False,
+    ) -> FileTreeNode:
+        subpath = Path(path) if path else None
+        return self.code_source.walk_tree(
+            self.repo_root,
+            max_depth=max_depth,
+            include_hidden=include_hidden,
+            subpath=subpath,
+        )

code_context/domain/use_cases/get_summary.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""GetSummaryUseCase — delegates to ProjectIntrospector."""
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from code_context.domain.models import ProjectSummary
+from code_context.domain.ports import ProjectIntrospector
+@dataclass
+class GetSummaryUseCase:
+    introspector: ProjectIntrospector
+    repo_root: Path
+    def run(self, scope: str = "project", path: Path | None = None) -> ProjectSummary:
+        # MCP `path` arg is documented as repo-relative; resolve here so
+        # introspectors can stay path-agnostic and so callers from other
+        # CWDs (the smoke harness, the CLI, MCP) all behave identically.
+        # Absolute paths pass through unchanged.
+        if path is not None and not path.is_absolute():
+            path = self.repo_root / path
+        return self.introspector.summary(self.repo_root, scope=scope, path=path)