PyPI - codespine - Versions diffs - 0.5.10__tar.gz → 0.6.0__tar.gz - Mend

codespine 0.5.10tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

{codespine-0.5.10 → codespine-0.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codespine
-Version: 0.5.10
+Version: 0.6.0
 Summary: Local Java code intelligence indexer backed by a graph database
 Author: CodeSpine contributors
 License: MIT License

{codespine-0.5.10 → codespine-0.6.0}/codespine/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """CodeSpine package."""
 __all__ = ["__version__"]
-__version__ = "0.5.10"
+__version__ = "0.6.0"

{codespine-0.5.10 → codespine-0.6.0}/codespine/cli.py RENAMED Viewed

@@ -101,16 +101,17 @@ def main() -> None:
 @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
 @click.option(
     "--embed/--no-embed",
-    default=False,
+    default=True,
     show_default=True,
-    help="Generate vector embeddings (slow if sentence-transformers installed; enables semantic search).",
+    help="Generate vector embeddings. Uses sentence-transformers if installed (pip install codespine[ml]), otherwise falls back to hash-based vectors.",
 )
 @click.option("--allow-running", is_flag=True, hidden=True, help="Skip MCP running check (used by MCP analyse_project tool).")
 def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool) -> None:
     """Index a local Java project (auto-detects workspace / Maven / Gradle layout).
-    By default embeddings are skipped for speed. Pass --embed to generate
-    vector embeddings for semantic search (requires sentence-transformers).
+    Embeddings are generated by default. If sentence-transformers is installed
+    (pip install codespine[ml]), high-quality semantic vectors are used; otherwise
+    a fast hash-based fallback provides basic vector search.
     """
     if not allow_running and _is_running():
         click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
@@ -118,6 +119,17 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
     started = time.perf_counter()
     abs_path = os.path.abspath(path)
+    # Warn about hash fallback early so users know to install [ml]
+    if embed:
+        from codespine.search.vector import _load_model
+        if _load_model() is None:
+            click.secho(
+                "⚠  sentence-transformers not found — using hash-based embeddings.\n"
+                "   For better semantic search: pip install codespine[ml]\n",
+                fg="yellow",
+            )
     store = GraphStore(read_only=False)
     indexer = JavaIndexer(store)
@@ -309,9 +321,27 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
         )
         _finish_phase(coup_label, f"{len(coupling_pairs)} coupled file pairs")
     else:
+        # Run lightweight versions of flow tracing and dead code from the call
+        # graph already built — no community detection or coupling (those are
+        # genuinely expensive).  This gives partial results without --deep.
         _phase("Detecting communities...", "skipped (large repo; rerun with --deep)")
-        _phase("Detecting execution flows...", "skipped (large repo; rerun with --deep)")
-        _phase("Finding dead code...", "skipped (large repo; rerun with --deep)")
+        flow_label = "Detecting execution flows..."
+        _live_phase(flow_label, "running (lightweight)")
+        try:
+            flows = trace_execution_flows(store, max_depth=3)
+        except Exception:
+            flows = []
+        _finish_phase(flow_label, f"{len(flows)} flows (lightweight; rerun with --deep for full)")
+        dead_label = "Finding dead code..."
+        _live_phase(dead_label, "running (lightweight)")
+        try:
+            dead = detect_dead_code(store, limit=100)
+        except Exception:
+            dead = []
+        _finish_phase(dead_label, f"{_dead_result_count(dead)} candidates (lightweight; rerun with --deep for full)")
         _phase("Analyzing git history...", "skipped (large repo; rerun with --deep)")
     vector_count = store.query_records(
@@ -331,7 +361,12 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
     edges = int(edge_count[0]["count"]) if edge_count else 0
     elapsed = time.perf_counter() - started
-    embed_note = "" if embed else " (no embeddings; rerun with --embed for semantic search)"
+    if not embed:
+        embed_note = " (no embeddings; rerun with --embed for semantic search)"
+    elif _load_model() is None:
+        embed_note = " (hash embeddings; pip install codespine[ml] for better search)"
+    else:
+        embed_note = ""
     module_info = f"{len(modules_with_ids)} modules/projects, " if is_multi else ""
     click.echo()
     click.secho(
@@ -339,6 +374,17 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
         fg="green",
     )
+    # Detect unresolved imports → hint about unindexed sibling projects
+    try:
+        unresolved = JavaIndexer.detect_unresolved_imports(store)
+        if unresolved:
+            click.echo()
+            click.secho("⚠  Unresolved imports — consider indexing these projects:", fg="yellow")
+            for pkg, samples in sorted(unresolved.items())[:8]:
+                click.echo(f"   {pkg}  (e.g. {samples[0]})")
+    except Exception:
+        pass  # best-effort
     # Publish a read replica so MCP and read-only CLI commands (search, stats…)
     # run against an isolated snapshot rather than competing with the write
     # process's buffer pool.  The MCP daemon detects the sentinel file and
@@ -579,7 +625,14 @@ def list_projects(as_json: bool) -> None:
 @main.command()
 @click.option("--json", "as_json", is_flag=True)
 def status(as_json: bool) -> None:
-    """Show service and database status."""
+    """Show service and database status.
+    Quick reference for MCP server management:
+      codespine start    – launch background MCP server
+      codespine stop     – stop background MCP server
+      codespine status   – this command
+      codespine mcp      – run MCP in foreground (stdio, for IDE integration)
+    """
     running = _is_running()
     pid = None
     if os.path.exists(SETTINGS.pid_file):
@@ -590,17 +643,35 @@ def status(as_json: bool) -> None:
             pid = None
     store = GraphStore(read_only=True)
     overlay = get_overlay_status(store)
+    # Check for stale PID file
+    stale_pid = pid is not None and not running
+    has_snapshot = os.path.exists(SETTINGS.db_snapshot_path)
     payload = {
         "running": running,
         "pid": pid,
+        "stale_pid": stale_pid,
         "pid_file": SETTINGS.pid_file,
         "db_path": SETTINGS.db_path,
         "db_size_bytes": _db_size_bytes(SETTINGS.db_path),
+        "read_replica": SETTINGS.db_snapshot_path if has_snapshot else None,
+        "read_replica_size_bytes": _db_size_bytes(SETTINGS.db_snapshot_path) if has_snapshot else 0,
         "log_file": SETTINGS.log_file,
         "overlay_dir": SETTINGS.overlay_dir,
         "overlay_projects": overlay,
     }
-    _echo_json(payload, as_json)
+    if as_json:
+        _echo_json(payload, True)
+    else:
+        _echo_json(payload, True)
+        if stale_pid:
+            click.secho(f"\n⚠  Stale PID file found (PID {pid} not running). Run 'codespine stop' to clean up.", fg="yellow")
+        if not running:
+            click.echo("\nTo start:  codespine start")
+            click.echo("For IDE:   codespine mcp  (stdio mode)")
+        else:
+            click.echo(f"\nMCP server running (PID {pid}). Stop with: codespine stop")
 @main.command("overlay-status")
@@ -727,6 +798,33 @@ def clear_index_cmd(allow_running: bool) -> None:
     click.secho(f"Cleared {len(projects)} project(s). Index is now empty.", fg="green")
+@main.command("force-reset")
+@click.option("--force", is_flag=True, help="Skip confirmation prompt.")
+def force_reset_cmd(force: bool) -> None:
+    """Emergency reset: delete ALL CodeSpine data files without touching the DB engine.
+    Use this when the buffer pool is exhausted and normal reset/clear commands
+    also fail with OOM.  This bypasses Kuzu entirely by removing data files
+    from disk, including the DB, read replica, overlay, meta cache, and
+    embedding cache.
+    After running this, restart the MCP server and re-index your projects.
+    """
+    if not force and not click.confirm(
+        "This will DELETE all CodeSpine data (DB, overlay, caches). Continue?"
+    ):
+        click.echo("Aborted.")
+        return
+    removed = GraphStore.force_delete_all_data()
+    if removed:
+        for p in removed:
+            click.echo(f"  removed: {p}")
+        click.secho(f"\nForce-reset complete. {len(removed)} path(s) removed.", fg="green")
+        click.echo("Next: restart MCP ('codespine stop && codespine start') and re-index.")
+    else:
+        click.secho("Nothing to remove — already clean.", fg="yellow")
 @main.command()
 def setup() -> None:
     """Print local setup checks and next steps."""
@@ -743,12 +841,21 @@ def setup() -> None:
             checks[mod] = True
         except Exception:
             checks[mod] = False
-    click.echo("Dependency check:")
+    click.echo("Core dependencies:")
     for mod, ok in checks.items():
         click.echo(f"  - {mod}: {'OK' if ok else 'MISSING'}")
-    click.echo("\\nRecommended:")
-    click.echo("  pip install -e .")
+    # Check optional ML dependencies
+    try:
+        from sentence_transformers import SentenceTransformer
+        click.echo("  - sentence-transformers: OK (semantic embeddings active)")
+    except ImportError:
+        click.secho("  - sentence-transformers: NOT INSTALLED (hash fallback; install for better search)", fg="yellow")
+    click.echo("\nRecommended setup:")
+    click.echo("  pip install -e '.[full]'                # core + ML + community detection")
+    click.echo("  pip install -e '.[ml]'                  # just ML embeddings")
+    click.echo("\nQuick start:")
     click.echo("  codespine analyse /path/to/java-project --full")
+    click.echo("  codespine start                         # launch MCP server")
     click.echo("  codespine search payment --json")

{codespine-0.5.10 → codespine-0.6.0}/codespine/config.py RENAMED Viewed

@@ -16,8 +16,8 @@ class Settings:
     rrf_k: int = 60
     semantic_candidate_pool: int = 2000
     write_batch_size: int = 500
-    index_file_batch_size: int = 64
-    edge_write_batch_size: int = 2000
+    index_file_batch_size: int = 20
+    edge_write_batch_size: int = 500
     default_coupling_months: int = 6
     default_min_coupling_strength: float = 0.3
     default_min_cochanges: int = 3

{codespine-0.5.10 → codespine-0.6.0}/codespine/db/store.py RENAMED Viewed

@@ -147,10 +147,13 @@ class GraphStore:
     def clear_project(self, project_id: str) -> None:
         file_recs = self.query_records("MATCH (f:File) WHERE f.project_id = $pid RETURN f.id as id", {"pid": project_id})
+        # Small batches (10 files per tx) prevent buffer pool OOM on large projects.
         for idx, rec in enumerate(file_recs, start=1):
-            self.clear_file(rec["id"])
-            if idx % 50 == 0:
+            with self.transaction():
+                self.clear_file(rec["id"])
+            if idx % 10 == 0:
                 self._recycle_conn()
+        self._recycle_conn()
         self.execute("MATCH (p:Project) WHERE p.id = $pid DETACH DELETE p", {"pid": project_id})
         self._recycle_conn()
@@ -502,6 +505,48 @@ class GraphStore:
         self.clear_flows()
         self.clear_coupling()
+    @staticmethod
+    def force_delete_all_data() -> list[str]:
+        """Delete all CodeSpine data files without touching the Kuzu engine.
+        This is the nuclear option for OOM recovery: when the buffer pool is
+        exhausted, normal DB writes (including reset_project / clear_project)
+        also fail.  This bypasses Kuzu entirely by removing the data files
+        from disk, allowing a fresh start.
+        Returns the list of paths that were removed.
+        """
+        removed: list[str] = []
+        for path in [
+            SETTINGS.db_path,
+            SETTINGS.db_snapshot_path,
+            SETTINGS.db_snapshot_path + ".updated",
+            SETTINGS.db_snapshot_path + ".tmp",
+            SETTINGS.embedding_cache_path,
+            SETTINGS.overlay_dir,
+            SETTINGS.index_meta_dir,
+        ]:
+            if not os.path.exists(path):
+                continue
+            try:
+                if os.path.isdir(path):
+                    shutil.rmtree(path, ignore_errors=True)
+                else:
+                    os.remove(path)
+                removed.append(path)
+            except OSError:
+                pass
+        # Also remove any stale WAL files next to the DB
+        for suffix in (".wal", ".lock"):
+            wal_path = SETTINGS.db_path + suffix
+            if os.path.exists(wal_path):
+                try:
+                    os.remove(wal_path)
+                    removed.append(wal_path)
+                except OSError:
+                    pass
+        return removed
     def rebuild_empty_db(self) -> None:
         self._recycle_conn()
         path = SETTINGS.db_path

{codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/engine.py RENAMED Viewed

@@ -253,6 +253,20 @@ class JavaIndexer:
                     for fid in delete_chunk:
                         self.store.clear_file(fid)
                 self.store._recycle_conn()
+        # Clean up stale project entries that point to the same path under a
+        # different ID (e.g. re-indexing "vision-server" directly after it was
+        # previously indexed as "vision::vision-server" from a workspace root).
+        try:
+            stale = self.store.query_records(
+                "MATCH (p:Project) WHERE p.path = $path AND p.id <> $pid RETURN p.id as id",
+                {"path": root_path, "pid": project_id},
+            )
+            for old in stale:
+                self.store.clear_project(old["id"])
+        except Exception:
+            pass  # best-effort cleanup
         self.store.upsert_project(project_id, root_path)
         for parse_chunk in self._chunked(parse_results, file_batch_size):
@@ -279,7 +293,7 @@ class JavaIndexer:
                         "hash": file_digest,
                     }
                 )
-                self._update_meta_cache_entry(meta_cache, f_id, file_path, file_digest, len(source))
+                self._update_meta_cache_entry(meta_cache, f_id, file_path, file_digest, len(source), imports=parsed.imports)
                 for cls in parsed.classes:
                     c_id = class_id(cls.fqcn, scope)
@@ -372,15 +386,31 @@ class JavaIndexer:
                         class_methods[c_id][method.signature] = m_id
                 files_indexed += 1
+            # Split writes into smaller transactions and recycle between each
+            # to prevent Kuzu WAL from exhausting the buffer pool on large
+            # incremental re-indexes (GH feedback: 1,604-file OOM).
+            if not full:
+                for clear_sub in self._chunked(file_rows, 10):
+                    with self.store.transaction():
+                        for row in clear_sub:
+                            self.store.clear_file(row["id"])
+                    self.store._recycle_conn()
             with self.store.transaction():
-                for row in file_rows:
-                    if not full:
-                        self.store.clear_file(row["id"])
                 self.store.upsert_files_batch(file_rows)
+            self.store._recycle_conn()
+            with self.store.transaction():
                 self.store.upsert_classes_batch(class_rows)
-                self.store.upsert_methods_batch(method_rows)
-                self.store.upsert_symbols_batch(symbol_rows)
             self.store._recycle_conn()
+            _METHOD_SUB_BATCH = 200
+            for method_sub in self._chunked(method_rows, _METHOD_SUB_BATCH):
+                with self.store.transaction():
+                    self.store.upsert_methods_batch(method_sub)
+                self.store._recycle_conn()
+            _SYMBOL_SUB_BATCH = 200
+            for symbol_sub in self._chunked(symbol_rows, _SYMBOL_SUB_BATCH):
+                with self.store.transaction():
+                    self.store.upsert_symbols_batch(symbol_sub)
+                self.store._recycle_conn()
         self._emit(progress, "resolve_calls_start")
         call_rows: list[dict] = []
@@ -697,7 +727,10 @@ class JavaIndexer:
             return
     @staticmethod
-    def _update_meta_cache_entry(meta_cache: dict[str, dict], fid: str, file_path: str, digest: str, size_hint: int) -> None:
+    def _update_meta_cache_entry(
+        meta_cache: dict[str, dict], fid: str, file_path: str, digest: str, size_hint: int,
+        imports: list[str] | None = None,
+    ) -> None:
         try:
             st = os.stat(file_path)
             mtime_ns = int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)))
@@ -705,7 +738,10 @@ class JavaIndexer:
         except OSError:
             mtime_ns = -1
             size = size_hint
-        meta_cache[fid] = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
+        entry: dict = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
+        if imports is not None:
+            entry["imports"] = imports
+        meta_cache[fid] = entry
     @staticmethod
     def _prune_meta_cache(meta_cache: dict[str, dict], current_file_ids: set[str]) -> None:
@@ -728,3 +764,76 @@ class JavaIndexer:
             return normalized.split("/src/", 1)[0]
         scope = os.path.dirname(normalized).strip()
         return scope or "."
+    @staticmethod
+    def detect_unresolved_imports(store) -> dict[str, list[str]]:
+        """Detect imports that reference packages not covered by any indexed project.
+        Returns a dict mapping unresolved base packages (e.g. "com.foo.bar")
+        to a list of sample import FQCNs.  Useful for suggesting which sibling
+        projects to index.
+        Only reports project-internal packages (not java.*, javax.*, org.apache.*
+        etc.).
+        """
+        # 1. Collect all indexed class FQCNs
+        try:
+            recs = store.query_records("MATCH (c:Class) RETURN c.fqcn as fqcn")
+        except Exception:
+            return {}
+        indexed_fqcns = {r["fqcn"] for r in recs if r.get("fqcn")}
+        indexed_packages = set()
+        for fqcn in indexed_fqcns:
+            parts = fqcn.rsplit(".", 1)
+            if len(parts) == 2:
+                indexed_packages.add(parts[0])
+        # 2. Collect all imports from overlay + any stored file data
+        # Parse imports from the parsed file metadata if available
+        meta_dir = SETTINGS.index_meta_dir
+        all_imports: set[str] = set()
+        if os.path.isdir(meta_dir):
+            for fname in os.listdir(meta_dir):
+                if not fname.endswith(".json"):
+                    continue
+                try:
+                    with open(os.path.join(meta_dir, fname), "r") as f:
+                        data = json.load(f)
+                    for fid, fmeta in data.items():
+                        for imp in fmeta.get("imports", []):
+                            all_imports.add(imp)
+                except Exception:
+                    pass
+        # 3. Also scan the DB for CALLS edges that reference unknown targets
+        # (lightweight — just check which classes were resolved vs not)
+        # 4. Filter: skip standard library / well-known third-party packages
+        _SKIP_PREFIXES = (
+            "java.", "javax.", "jakarta.",
+            "org.apache.", "org.springframework.", "org.hibernate.",
+            "org.slf4j.", "org.junit.", "org.mockito.",
+            "com.google.", "com.fasterxml.", "com.sun.",
+            "io.micrometer.", "io.netty.", "io.lettuce.",
+            "lombok.", "reactor.", "rx.",
+        )
+        unresolved: dict[str, list[str]] = {}
+        for imp in all_imports:
+            if any(imp.startswith(prefix) for prefix in _SKIP_PREFIXES):
+                continue
+            # Check if this import's class exists in the index
+            simple_name = imp.rsplit(".", 1)[-1]
+            pkg = imp.rsplit(".", 1)[0] if "." in imp else ""
+            if imp in indexed_fqcns:
+                continue
+            if pkg in indexed_packages:
+                continue  # same package, just not this specific class
+            # Group by top 3 package segments
+            parts = imp.split(".")
+            base_pkg = ".".join(parts[:min(3, len(parts))])
+            if base_pkg not in unresolved:
+                unresolved[base_pkg] = []
+            if len(unresolved[base_pkg]) < 5:
+                unresolved[base_pkg].append(imp)
+        return unresolved

{codespine-0.5.10 → codespine-0.6.0}/codespine/mcp/server.py RENAMED Viewed

@@ -59,12 +59,15 @@ def _git_available(path: str) -> bool:
 def _resolve_repo_path(store, project: str | None, repo_path_provider) -> str:
     """Resolve the filesystem path for a given project_id, falling back to cwd."""
     if project:
-        recs = store.query_records(
-            "MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
-            {"pid": project},
-        )
-        if recs and recs[0].get("path"):
-            return recs[0]["path"]
+        try:
+            recs = store.query_records(
+                "MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
+                {"pid": project},
+            )
+            if recs and recs[0].get("path"):
+                return recs[0]["path"]
+        except Exception:
+            pass
     return repo_path_provider()
@@ -305,6 +308,20 @@ def build_mcp_server(store, repo_path_provider):
                 "RECOMMENDED: start watch mode during active development."
             )
+        # Detect unresolved imports → hint about unindexed sibling projects
+        unresolved_imports: dict[str, list[str]] = {}
+        try:
+            from codespine.indexer.engine import JavaIndexer as _JI
+            unresolved_imports = _JI.detect_unresolved_imports(store)
+            if unresolved_imports:
+                pkgs = list(unresolved_imports.keys())[:5]
+                notes["unresolved_imports"] = (
+                    f"Imports from unindexed packages detected: {', '.join(pkgs)}. "
+                    "Consider indexing these projects for complete cross-project tracing."
+                )
+        except Exception:
+            pass
         return {
             "available": True,
             "indexed_projects": projects,
@@ -333,6 +350,7 @@ def build_mcp_server(store, repo_path_provider):
                 "get_overlay_status": True,
                 "promote_overlay": True,
                 "clear_overlay": True,
+                "force_reset_index": True,
             },
             "background_jobs": {
                 "watch_running": watch_running,
@@ -787,20 +805,36 @@ def build_mcp_server(store, repo_path_provider):
         Recent git commits for the project (or a specific file).
         Returns available=false if the directory is not a git repository.
         Use project=<project_id> to target a specific indexed module's repo.
+        TIP: Always pass project= to ensure the correct repo is used.
         """
         repo = _resolve_repo_path(store, project, repo_path_provider)
+        if not os.path.isdir(repo):
+            return {
+                "available": False,
+                "note": f"Path does not exist: {repo}. Pass project=<project_id> to resolve the repo from the index.",
+            }
         if not _git_available(repo):
-            return {"available": False, "note": "Not a git repository (or git not installed)."}
+            return {
+                "available": False,
+                "note": (
+                    f"Not a git repository at {repo}. "
+                    "Pass project=<project_id> so the tool resolves the correct repo root. "
+                    "Use list_projects() to see available IDs."
+                ),
+            }
         cmd = ["git", "log", f"--max-count={limit}", "--oneline", "--no-decorate"]
         if file_path:
             cmd += ["--", file_path]
         r = subprocess.run(cmd, cwd=repo, capture_output=True, text=True, timeout=30)
         if r.returncode != 0:
-            return {"available": False, "error": r.stderr.strip()}
+            return {"available": False, "error": r.stderr.strip(), "repo_path": repo}
+        log_lines = r.stdout.strip().splitlines()
         return {
             "available": True,
             "project": project or repo,
-            "log": r.stdout.strip().splitlines(),
+            "repo_path": repo,
+            "log": log_lines,
+            "note": f"{len(log_lines)} commit(s)" + (" (no commits yet)" if not log_lines else ""),
         }
     @mcp.tool()
@@ -809,26 +843,42 @@ def build_mcp_server(store, repo_path_provider):
         Show git diff (working tree vs ref, or between two refs separated by '...').
         Output is truncated to 200 lines.
         Returns available=false if the directory is not a git repository.
+        TIP: Always pass project= to ensure the correct repo is used.
         """
         repo = _resolve_repo_path(store, project, repo_path_provider)
+        if not os.path.isdir(repo):
+            return {
+                "available": False,
+                "note": f"Path does not exist: {repo}. Pass project=<project_id> to resolve the repo from the index.",
+            }
         if not _git_available(repo):
-            return {"available": False, "note": "Not a git repository (or git not installed)."}
+            return {
+                "available": False,
+                "note": (
+                    f"Not a git repository at {repo}. "
+                    "Pass project=<project_id> so the tool resolves the correct repo root. "
+                    "Use list_projects() to see available IDs."
+                ),
+            }
         cmd = ["git", "diff", ref]
         if file_path:
             cmd += ["--", file_path]
         r = subprocess.run(cmd, cwd=repo, capture_output=True, text=True, timeout=30)
         if r.returncode != 0:
-            return {"available": False, "error": r.stderr.strip()}
+            return {"available": False, "error": r.stderr.strip(), "repo_path": repo}
         lines = r.stdout.splitlines()
         truncated = False
         if len(lines) > 200:
             lines = lines[:200]
             truncated = True
+        diff_text = "\n".join(lines)
         return {
             "available": True,
             "project": project or repo,
-            "diff": "\n".join(lines),
+            "repo_path": repo,
+            "diff": diff_text,
             "truncated": truncated,
+            "note": f"{len(lines)} line(s)" + (" — no changes" if not diff_text.strip() else ""),
         }
     @mcp.tool()
@@ -1240,6 +1290,38 @@ def build_mcp_server(store, repo_path_provider):
             ),
         }
+    @mcp.tool()
+    def force_reset_index():
+        """
+        Emergency reset: delete ALL CodeSpine data files without touching the
+        DB engine.
+        Use this when the buffer pool is exhausted and normal reset/clear
+        commands also fail with OOM errors.  This bypasses Kuzu entirely by
+        removing all data files from disk.
+        After calling this, restart the MCP server and re-index all projects
+        with analyse_project().
+        This is the nuclear option — only use when reset_project() and
+        reset_index() fail with buffer pool errors.
+        """
+        from codespine.db.store import GraphStore as _GS
+        removed = _GS.force_delete_all_data()
+        return {
+            "available": True,
+            "removed_paths": removed,
+            "removed_count": len(removed),
+            "note": (
+                f"Force-reset complete. {len(removed)} path(s) removed. "
+                "Restart the MCP server (codespine stop && codespine start) "
+                "and re-index projects with analyse_project()."
+                if removed else
+                "Nothing to remove — already clean."
+            ),
+        }
     # ------------------------------------------------------------------
     # Neighborhood exploration
     # ------------------------------------------------------------------
@@ -1395,6 +1477,10 @@ def build_mcp_server(store, repo_path_provider):
         Use this after editing a file to immediately refresh the graph without
         waiting for watch mode or running a full analysis.
+        The file is parsed and its symbols are stored in the overlay (just like
+        watch mode), so the updated data is immediately visible in search and
+        find_symbol results.
         Parameters:
           file_path – Absolute path to the .java file.
           project   – Optional project_id. If omitted, the tool infers the
@@ -1408,9 +1494,12 @@ def build_mcp_server(store, repo_path_provider):
         # Resolve project from indexed projects if not given
         if not project:
-            projects = store.query_records(
-                "MATCH (p:Project) RETURN p.id as id, p.path as path"
-            )
+            try:
+                projects = store.query_records(
+                    "MATCH (p:Project) RETURN p.id as id, p.path as path"
+                )
+            except Exception as exc:
+                return {"available": False, "note": f"DB read failed: {exc}"}
             for p in projects:
                 if abs_fp.startswith(p["path"] + _os.sep):
                     project = p["id"]
@@ -1425,58 +1514,64 @@ def build_mcp_server(store, repo_path_provider):
                 }
         # Find the project path to use as root for indexing
-        proj_recs = store.query_records(
-            "MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
-            {"pid": project},
-        )
+        try:
+            proj_recs = store.query_records(
+                "MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
+                {"pid": project},
+            )
+        except Exception as exc:
+            return {"available": False, "note": f"DB read failed: {exc}"}
         if not proj_recs:
             return {"available": False, "note": f"Project '{project}' not found in index."}
         proj_path = proj_recs[0]["path"]
-        # Run incremental index via subprocess to avoid read-only DB constraint.
-        # Use Popen + communicate() with a timeout so that a hang never crashes
-        # the MCP server process — the subprocess is killed gracefully instead.
-        cmd = [
-            sys.executable, "-m", "codespine.cli",
-            "analyse", proj_path,
-            "--incremental", "--no-embed", "--allow-running",
-        ]
+        # Use overlay-based single-file update (same mechanism as watch mode).
+        # This avoids spawning a subprocess and contending with the write DB.
+        from codespine.watch.watcher import _update_overlay_for_files
         t0 = time.time()
         try:
-            proc = subprocess.Popen(
-                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
-            )
-            stdout, stderr = proc.communicate(timeout=30)
+            result = _update_overlay_for_files(store, proj_path, project, [abs_fp])
             elapsed = round(time.time() - t0, 2)
-        except subprocess.TimeoutExpired:
-            proc.kill()
-            proc.communicate()  # reap zombie
-            elapsed = round(time.time() - t0, 2)
-            return {
-                "available": False,
-                "note": f"Re-index timed out after {elapsed}s. The project may be too large for single-file re-index. Use analyse_project() instead.",
-            }
         except Exception as exc:
             elapsed = round(time.time() - t0, 2)
-            return {
-                "available": False,
-                "note": f"Re-index error: {exc}",
-            }
-        if proc.returncode != 0:
-            return {
-                "available": False,
-                "note": f"Re-index failed (code {proc.returncode})",
-                "error": (stderr or stdout or "").strip()[:500],
-            }
+            _LOGGER.warning("reindex_file failed: %s", exc)
+            # Fall back to subprocess approach
+            cmd = [
+                sys.executable, "-m", "codespine.cli",
+                "analyse", proj_path,
+                "--incremental", "--no-embed", "--allow-running",
+            ]
+            try:
+                proc = subprocess.Popen(
+                    cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
+                )
+                stdout, stderr = proc.communicate(timeout=60)
+                elapsed = round(time.time() - t0, 2)
+                if proc.returncode != 0:
+                    return {
+                        "available": False,
+                        "note": f"Re-index failed (code {proc.returncode})",
+                        "error": (stderr or stdout or "").strip()[:500],
+                    }
+                return {
+                    "available": True,
+                    "file": abs_fp,
+                    "project": project,
+                    "elapsed_s": elapsed,
+                    "note": f"Overlay update failed; fell back to full incremental re-index in {elapsed}s.",
+                }
+            except Exception as fallback_exc:
+                return {"available": False, "note": f"Re-index error: overlay={exc}, subprocess={fallback_exc}"}
         return {
             "available": True,
             "file": abs_fp,
             "project": project,
             "elapsed_s": elapsed,
-            "note": f"Re-indexed project {project} incrementally in {elapsed}s.",
+            "changed": result.get("changed", 0),
+            "note": f"Re-indexed {abs_fp} via overlay in {elapsed}s.",
         }
     # ------------------------------------------------------------------

{codespine-0.5.10 → codespine-0.6.0}/codespine/watch/watcher.py RENAMED Viewed

@@ -1,8 +1,10 @@
 from __future__ import annotations
+import logging
 import os
 import threading
 import time
+import traceback
 from codespine.analysis.community import detect_communities
 from codespine.analysis.coupling import compute_coupling
@@ -13,6 +15,8 @@ from codespine.indexer.engine import JavaIndexer
 from codespine.overlay.git_state import current_head, git_repo_root
 from codespine.overlay.store import OverlayStore, build_overlay_file_entry
+LOGGER = logging.getLogger(__name__)
 def _project_modules(root_path: str) -> tuple[dict[str, str], list[str], bool]:
     abs_path = os.path.abspath(root_path)
@@ -39,15 +43,22 @@ def get_overlay_status(store, project: str | None = None) -> list[dict]:
     statuses = overlay_store.status(project)
     out: list[dict] = []
     for item in statuses:
-        metadata = store.get_project_metadata(item["project_id"]) or {}
+        try:
+            metadata = store.get_project_metadata(item["project_id"]) or {}
+        except Exception:
+            metadata = {}
+        # The overlay JSON on disk is the source of truth; the DB flag
+        # may be stale if the watch process couldn't write to the DB.
+        overlay_present = bool(item.get("overlay_present"))
+        db_dirty = bool(metadata.get("overlay_dirty", False))
         out.append(
             {
                 **item,
                 "indexed_commit": metadata.get("indexed_commit", ""),
-                "overlay_dirty": bool(metadata.get("overlay_dirty", False)),
+                "overlay_dirty": overlay_present or db_dirty,
                 "indexed_at": metadata.get("indexed_at", ""),
                 "promotion_pending": bool(
-                    item.get("overlay_present")
+                    overlay_present
                     and item.get("current_head")
                     and metadata.get("indexed_commit")
                     and item.get("current_head") != metadata.get("indexed_commit")
@@ -107,68 +118,98 @@ def promote_overlay(store, project: str | None = None, require_head_change: bool
 def _update_overlay_for_files(store, project_path: str, project_id: str, file_paths: list[str]) -> dict:
     overlay_store: OverlayStore = store.overlay_store
     indexer = JavaIndexer(store)
-    metadata = store.get_project_metadata(project_id) or {}
     repo_root = git_repo_root(project_path)
-    indexed_commit = str(metadata.get("indexed_commit") or "")
     head = current_head(project_path)
-    existing_doc = overlay_store.load_project(project_id)
-    base_method_catalog = indexer._existing_method_catalog(project_id)
-    base_class_catalog = indexer._existing_class_catalog(project_id)
-    base_class_ids = indexer._existing_class_ids_by_fqcn(project_id)
-    base_class_methods = indexer._existing_class_methods(project_id)
-    embed = store.project_has_embeddings(project_id)
+    # DB reads can fail if the write DB is busy; fall back to empty catalogs
+    # so the overlay still captures the file changes from tree-sitter alone.
+    try:
+        metadata = store.get_project_metadata(project_id) or {}
+    except Exception as exc:
+        LOGGER.warning("watch: DB read failed for project metadata (%s), using empty", exc)
+        metadata = {}
+    indexed_commit = str(metadata.get("indexed_commit") or "")
+    try:
+        base_method_catalog = indexer._existing_method_catalog(project_id)
+        base_class_catalog = indexer._existing_class_catalog(project_id)
+        base_class_ids = indexer._existing_class_ids_by_fqcn(project_id)
+        base_class_methods = indexer._existing_class_methods(project_id)
+    except Exception as exc:
+        LOGGER.warning("watch: DB read failed for catalogs (%s), using empty", exc)
+        base_method_catalog = {}
+        base_class_catalog = {}
+        base_class_ids = {}
+        base_class_methods = {}
-    changed = deleted = 0
+    try:
+        embed = store.project_has_embeddings(project_id)
+    except Exception:
+        embed = False
+    existing_doc = overlay_store.load_project(project_id)
+    changed = deleted = errors = 0
     for file_path in sorted(set(os.path.abspath(p) for p in file_paths)):
         if not file_path.endswith(".java"):
             continue
-        if os.path.exists(file_path):
-            with open(file_path, "rb") as fh:
-                source = fh.read()
-            entry = build_overlay_file_entry(
-                store=store,
-                project_id=project_id,
-                project_path=project_path,
-                file_path=file_path,
-                source=source,
-                embed=embed,
-                base_method_catalog=base_method_catalog,
-                base_class_catalog=base_class_catalog,
-                base_class_ids_by_fqcn=base_class_ids,
-                base_class_methods=base_class_methods,
-                existing_overlay_doc=existing_doc,
-            )
-            overlay_store.upsert_file(
-                project_id=project_id,
-                project_path=project_path,
-                repo_root=repo_root,
-                base_commit=indexed_commit,
-                current_head=head,
-                file_path=file_path,
-                entry=entry,
-            )
-            existing_doc = overlay_store.load_project(project_id)
-            changed += 1
-        else:
-            overlay_store.mark_deleted(
-                project_id=project_id,
-                project_path=project_path,
-                repo_root=repo_root,
-                base_commit=indexed_commit,
-                current_head=head,
-                file_path=file_path,
-            )
-            existing_doc = overlay_store.load_project(project_id)
-            deleted += 1
+        try:
+            if os.path.exists(file_path):
+                with open(file_path, "rb") as fh:
+                    source = fh.read()
+                entry = build_overlay_file_entry(
+                    store=store,
+                    project_id=project_id,
+                    project_path=project_path,
+                    file_path=file_path,
+                    source=source,
+                    embed=embed,
+                    base_method_catalog=base_method_catalog,
+                    base_class_catalog=base_class_catalog,
+                    base_class_ids_by_fqcn=base_class_ids,
+                    base_class_methods=base_class_methods,
+                    existing_overlay_doc=existing_doc,
+                )
+                overlay_store.upsert_file(
+                    project_id=project_id,
+                    project_path=project_path,
+                    repo_root=repo_root,
+                    base_commit=indexed_commit,
+                    current_head=head,
+                    file_path=file_path,
+                    entry=entry,
+                )
+                existing_doc = overlay_store.load_project(project_id)
+                changed += 1
+            else:
+                overlay_store.mark_deleted(
+                    project_id=project_id,
+                    project_path=project_path,
+                    repo_root=repo_root,
+                    base_commit=indexed_commit,
+                    current_head=head,
+                    file_path=file_path,
+                )
+                existing_doc = overlay_store.load_project(project_id)
+                deleted += 1
+        except Exception as exc:
+            LOGGER.warning("watch: failed to process %s: %s", file_path, exc)
+            errors += 1
     if changed or deleted:
-        if metadata:
-            store.set_project_overlay_dirty(project_id, True)
-        else:
-            store.upsert_project(project_id, project_path)
-            store.set_project_indexed_commit(project_id, indexed_commit)
-            store.set_project_overlay_dirty(project_id, True)
-    return {"project_id": project_id, "changed": changed, "deleted": deleted}
+        # Try to mark dirty in the DB; if the DB is busy (write contention),
+        # the overlay JSON on disk is still correct and will be picked up on
+        # next read.  Don't let a DB write failure discard overlay work.
+        try:
+            if metadata:
+                store.set_project_overlay_dirty(project_id, True)
+            else:
+                store.upsert_project(project_id, project_path)
+                store.set_project_indexed_commit(project_id, indexed_commit)
+                store.set_project_overlay_dirty(project_id, True)
+        except Exception as exc:
+            LOGGER.warning("watch: DB write failed for overlay_dirty flag (%s); overlay is still on disk", exc)
+    return {"project_id": project_id, "changed": changed, "deleted": deleted, "errors": errors}
 def run_watch_mode(
@@ -227,11 +268,17 @@ def run_watch_mode(
             for module_path, files in sorted(grouped.items()):
                 project_id = module_map.get(module_path, os.path.basename(module_path))
                 start = time.time()
-                result = _update_overlay_for_files(store, module_path, project_id, files)
+                try:
+                    result = _update_overlay_for_files(store, module_path, project_id, files)
+                except Exception as exc:
+                    LOGGER.error("watch: overlay update failed for %s: %s\n%s", project_id, exc, traceback.format_exc())
+                    print(f"[{time.strftime('%H:%M:%S')}] {project_id}: ERROR updating overlay — {exc}")
+                    continue
                 elapsed = time.time() - start
+                err_note = f", {result.get('errors', 0)} errors" if result.get("errors") else ""
                 print(
                     f"[{time.strftime('%H:%M:%S')}] {project_id}: overlay updated "
-                    f"({result['changed']} changed, {result['deleted']} deleted) in {elapsed:.1f}s"
+                    f"({result['changed']} changed, {result['deleted']} deleted{err_note}) in {elapsed:.1f}s"
                 )
             if promote_on_commit:

{codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codespine
-Version: 0.5.10
+Version: 0.6.0
 Summary: Local Java code intelligence indexer backed by a graph database
 Author: CodeSpine contributors
 License: MIT License

{codespine-0.5.10 → codespine-0.6.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "codespine"
-version = "0.5.10"
+version = "0.6.0"
 description = "Local Java code intelligence indexer backed by a graph database"
 readme = "README.md"
 requires-python = ">=3.10"

{codespine-0.5.10 → codespine-0.6.0}/LICENSE RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/README.md RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/community.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/context.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/coupling.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/crossmodule.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/deadcode.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/flow.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/impact.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/db/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/db/schema.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/diff/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/diff/branch_diff.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/call_resolver.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/java_parser.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/symbol_builder.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/mcp/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/noise/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/noise/blocklist.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/git_state.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/merge.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/store.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/search/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/search/bm25.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/search/fuzzy.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/search/hybrid.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/search/rrf.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/search/vector.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine/watch/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/entry_points.txt RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/requires.txt RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/top_level.txt RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/gindex.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/setup.cfg RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/tests/test_branch_diff_normalize.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/tests/test_call_resolver.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/tests/test_community_detection.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/tests/test_deadcode.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/tests/test_index_and_hybrid.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/tests/test_java_parser.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/tests/test_multimodule_index.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/tests/test_overlay.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/tests/test_search_ranking.py RENAMED Viewed

File without changes

{codespine-0.5.10 → codespine-0.6.0}/tests/test_store_recovery.py RENAMED Viewed

File without changes

codespine 0.5.10__tar.gz → 0.6.0__tar.gz

codespine 0.5.10tar.gz → 0.6.0tar.gz