PyPI - codespine - Versions diffs - 0.9.7__tar.gz → 0.9.9__tar.gz - Mend

codespine 0.9.7tar.gz → 0.9.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

{codespine-0.9.7 → codespine-0.9.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codespine
-Version: 0.9.7
+Version: 0.9.9
 Summary: Local Java code intelligence indexer backed by a graph database
 Author: CodeSpine contributors
 License: MIT License
@@ -55,11 +55,14 @@ Requires-Dist: numpy; extra == "ml"
 Provides-Extra: community
 Requires-Dist: igraph; extra == "community"
 Requires-Dist: leidenalg; extra == "community"
+Provides-Extra: duckdb
+Requires-Dist: duckdb>=0.10.0; extra == "duckdb"
 Provides-Extra: full
 Requires-Dist: sentence-transformers; extra == "full"
 Requires-Dist: numpy; extra == "full"
 Requires-Dist: igraph; extra == "full"
 Requires-Dist: leidenalg; extra == "full"
+Requires-Dist: duckdb>=0.10.0; extra == "full"
 Dynamic: license-file
 # CodeSpine

{codespine-0.9.7 → codespine-0.9.9}/codespine/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """CodeSpine package."""
 __all__ = ["__version__"]
-__version__ = "0.9.7"
+__version__ = "0.9.9"

codespine-0.9.9/codespine/cache/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""Result caching for MCP tools."""
+from codespine.cache.result_cache import ResultCache
+__all__ = ["ResultCache"]

codespine-0.9.9/codespine/cache/result_cache.py ADDED Viewed

@@ -0,0 +1,167 @@
+"""LRU result cache for CodeSpine MCP tools.
+Avoids recomputing expensive analyses (impact BFS, dead-code scan, community
+lookup) when the same arguments are passed and the underlying index hasn't
+changed since the last call.
+Cache key: ``(tool_name, args_hash, snapshot_mtime_rounded)``
+  - ``tool_name`` — the MCP tool that produced the result
+  - ``args_hash`` — SHA-1 of the JSON-serialised arguments (sorted keys)
+  - ``snapshot_mtime_rounded`` — read-replica mtime rounded to 1 s, so a new
+    snapshot invalidates all cached results for the affected store
+TTL: entries are evicted after ``ttl_s`` seconds (default 300 s / 5 min) even
+if the cache isn't full, preventing stale results across long sessions.
+Usage
+-----
+    from codespine.cache.result_cache import ResultCache
+    _cache = ResultCache(maxsize=256, ttl_s=300.0)
+    key = _cache.make_key("get_impact", {"symbol": "Foo", "project": "myapp"}, mtime)
+    cached = _cache.get(key)
+    if cached is not None:
+        return cached
+    result = expensive_computation(...)
+    _cache.put(key, result)
+    return result
+"""
+from __future__ import annotations
+import hashlib
+import json
+import threading
+import time
+from collections import OrderedDict
+from typing import Any
+class ResultCache:
+    """Thread-safe LRU cache for pre-serialised JSON tool results.
+    Parameters
+    ----------
+    maxsize:
+        Maximum number of entries to keep.  Oldest entry is evicted when
+        the cache is full (LRU eviction).
+    ttl_s:
+        Time-to-live in seconds.  Entries older than this are treated as
+        missing even if they're still in the cache.
+    """
+    def __init__(self, maxsize: int = 256, ttl_s: float = 300.0) -> None:
+        self._maxsize = maxsize
+        self._ttl = ttl_s
+        # OrderedDict preserves insertion order: oldest → newest
+        self._cache: OrderedDict[tuple, tuple[str, float]] = OrderedDict()
+        self._lock = threading.Lock()
+        self._hits = 0
+        self._misses = 0
+    # ------------------------------------------------------------------
+    # Key construction
+    # ------------------------------------------------------------------
+    @staticmethod
+    def make_key(
+        tool_name: str,
+        args: dict[str, Any],
+        snapshot_mtime: float,
+    ) -> tuple:
+        """Build a cache key from tool name, arguments, and index timestamp.
+        Parameters
+        ----------
+        tool_name:
+            Name of the MCP tool (e.g. ``"get_impact"``).
+        args:
+            Tool arguments dict (``None`` values included so missing optional
+            args don't collide with explicitly-set ones).
+        snapshot_mtime:
+            Last-modified time of the read-replica sentinel file, rounded to
+            1-second precision.  A new snapshot invalidates old entries.
+        """
+        try:
+            args_bytes = json.dumps(args, sort_keys=True, default=str).encode()
+        except Exception:
+            args_bytes = str(args).encode()
+        args_hash = hashlib.sha1(args_bytes).hexdigest()[:16]
+        return (tool_name, args_hash, round(snapshot_mtime, 0))
+    # ------------------------------------------------------------------
+    # Cache operations
+    # ------------------------------------------------------------------
+    def get(self, key: tuple) -> str | None:
+        """Return the cached value for *key*, or ``None`` if missing/expired."""
+        with self._lock:
+            if key not in self._cache:
+                self._misses += 1
+                return None
+            value, inserted_at = self._cache[key]
+            if time.monotonic() - inserted_at > self._ttl:
+                del self._cache[key]
+                self._misses += 1
+                return None
+            # Promote to most-recently-used position.
+            self._cache.move_to_end(key)
+            self._hits += 1
+            return value
+    def put(self, key: tuple, value: str) -> None:
+        """Store *value* under *key*.  Evicts LRU entry if cache is full."""
+        with self._lock:
+            if key in self._cache:
+                self._cache.move_to_end(key)
+            self._cache[key] = (value, time.monotonic())
+            # Evict oldest entries until we're within maxsize.
+            while len(self._cache) > self._maxsize:
+                self._cache.popitem(last=False)
+    def invalidate(self) -> int:
+        """Clear the entire cache.  Call after any index mutation.
+        Returns the number of entries evicted.
+        """
+        with self._lock:
+            n = len(self._cache)
+            self._cache.clear()
+            return n
+    def invalidate_tool(self, tool_name: str) -> int:
+        """Evict all entries for a specific tool.
+        Returns the number of entries removed.
+        """
+        with self._lock:
+            keys_to_remove = [k for k in self._cache if k[0] == tool_name]
+            for k in keys_to_remove:
+                del self._cache[k]
+            return len(keys_to_remove)
+    # ------------------------------------------------------------------
+    # Stats / introspection
+    # ------------------------------------------------------------------
+    def stats(self) -> dict[str, Any]:
+        """Return cache statistics (size, hit/miss counts, hit rate)."""
+        with self._lock:
+            total = self._hits + self._misses
+            return {
+                "size": len(self._cache),
+                "maxsize": self._maxsize,
+                "ttl_s": self._ttl,
+                "hits": self._hits,
+                "misses": self._misses,
+                "hit_rate": round(self._hits / total, 3) if total else 0.0,
+            }
+    def __repr__(self) -> str:  # pragma: no cover
+        s = self.stats()
+        return (
+            f"ResultCache(size={s['size']}/{s['maxsize']}, "
+            f"hits={s['hits']}, misses={s['misses']}, "
+            f"hit_rate={s['hit_rate']:.1%})"
+        )

{codespine-0.9.7 → codespine-0.9.9}/codespine/cli.py RENAMED Viewed

@@ -293,7 +293,13 @@ def main() -> None:
 @main.command()
 @click.argument("path", type=click.Path(exists=True))
 @click.option("--full/--incremental", default=False, show_default=True)
-@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
+@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses (auto-on for repos ≤3 k files).")
+@click.option(
+    "--incremental-deep",
+    is_flag=True,
+    default=False,
+    help="Force deep analysis even during incremental re-index. Useful after large refactors.",
+)
 @click.option(
     "--embed/--no-embed",
     default=True,
@@ -301,7 +307,7 @@ def main() -> None:
     help="Generate vector embeddings. Uses sentence-transformers if installed (pip install codespine[ml]), otherwise falls back to hash-based vectors.",
 )
 @click.option("--allow-running", is_flag=True, hidden=True, help="Skip MCP running check (used by MCP analyse_project tool).")
-def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool) -> None:
+def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bool, allow_running: bool) -> None:
     """Index a local Java project (auto-detects workspace / Maven / Gradle layout).
     Embeddings are generated by default. If sentence-transformers is installed
@@ -459,7 +465,7 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
     dead: list[dict] = []
     coupling_pairs: list[dict] = []
-    should_run_deep = deep or total_files_found <= 1200
+    should_run_deep = deep or incremental_deep or total_files_found <= 3000
     if should_run_deep:
         comm_label = "Detecting communities..."
         _live_phase(comm_label, "running")
@@ -1138,6 +1144,36 @@ def stop() -> None:
             os.remove(SETTINGS.pid_file)
+@main.command("install-model")
+def install_model() -> None:
+    """Download and cache the sentence-transformers embedding model.
+    Requires 'pip install codespine[ml]'. The model is downloaded once and
+    cached locally; subsequent analyse runs use the cache without network access.
+    """
+    try:
+        from sentence_transformers import SentenceTransformer  # noqa: F401
+    except ImportError:
+        click.secho(
+            "sentence-transformers is not installed.\n"
+            "Run: pip install codespine[ml]",
+            fg="red",
+        )
+        return
+    model_name = SETTINGS.embedding_model
+    click.secho(f"Downloading model '{model_name}' …", fg="cyan")
+    try:
+        from sentence_transformers import SentenceTransformer
+        model = SentenceTransformer(model_name)
+        # Run a tiny inference to confirm the model is usable.
+        _ = model.encode(["hello world"])
+        click.secho(f"✓ Model '{model_name}' ready. Semantic search is now enabled.", fg="green")
+    except Exception as exc:
+        click.secho(f"✗ Failed to load model: {exc}", fg="red")
 @main.command("run-mcp", hidden=True)
 def run_mcp() -> None:
     """Run MCP server in stdio mode."""

{codespine-0.9.7 → codespine-0.9.9}/codespine/config.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import os
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 @dataclass(frozen=True)
@@ -15,6 +15,10 @@ class Settings:
     num_shards: int = 4
     shards_dir: str = os.path.expanduser("~/.codespine/shards")
+    # Storage backend: "kuzu" (default, property-graph) or "duckdb" (relational).
+    # Override at runtime via CODESPINE_BACKEND env var before starting the process.
+    backend: str = field(default_factory=lambda: os.environ.get("CODESPINE_BACKEND", "kuzu"))
     pid_file: str = os.path.expanduser("~/.codespine.pid")
     log_file: str = os.path.expanduser("~/.codespine.log")
     embedding_cache_path: str = os.path.expanduser("~/.codespine_embedding_cache.json")

codespine 0.9.7__tar.gz → 0.9.9__tar.gz

codespine 0.9.7tar.gz → 0.9.9tar.gz