PyPI - codespine - Versions diffs - 0.9.5__tar.gz → 0.9.7__tar.gz - Mend

codespine 0.9.5tar.gz → 0.9.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

{codespine-0.9.5 → codespine-0.9.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codespine
-Version: 0.9.5
+Version: 0.9.7
 Summary: Local Java code intelligence indexer backed by a graph database
 Author: CodeSpine contributors
 License: MIT License

{codespine-0.9.5 → codespine-0.9.7}/codespine/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """CodeSpine package."""
 __all__ = ["__version__"]
-__version__ = "0.9.5"
+__version__ = "0.9.7"

{codespine-0.9.5 → codespine-0.9.7}/codespine/cli.py RENAMED Viewed

@@ -6,7 +6,10 @@ import os
 import signal
 import subprocess
 import sys
+import threading
 import time
+from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor, as_completed
 import click
 import psutil
@@ -20,6 +23,7 @@ from codespine.analysis.flow import trace_execution_flows
 from codespine.analysis.impact import analyze_impact
 from codespine.config import SETTINGS
 from codespine.db.store import GraphStore
+from codespine.sharding import ShardedGraphStore, ShardRouter
 from codespine.diff.branch_diff import compare_branches
 from codespine.indexer.engine import JavaIndexer
 from codespine.mcp.server import build_mcp_server
@@ -90,6 +94,197 @@ def _spinner_char() -> str:
     return "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"[int(time.perf_counter() * 8) % 10]
+def _index_shard_group(
+    shard_idx: int,
+    modules: list[tuple[str, str]],
+    sg,
+    full: bool,
+    embed: bool,
+    output_lock: threading.Lock,
+    parallel: bool,
+) -> tuple[int, list, int]:
+    """Index one group of modules that share a shard.
+    Modules within the group are always indexed sequentially (same KùzuDB).
+    Multiple groups can run concurrently in different threads when they own
+    different shards.
+    Returns (total_files_found, all_results, shard_idx).
+    """
+    results = []
+    total_files = 0
+    def _locked_echo(*args, **kwargs) -> None:
+        """Thread-safe click.echo."""
+        with output_lock:
+            click.echo(*args, **kwargs)
+    def _locked_secho(*args, **kwargs) -> None:
+        with output_lock:
+            click.secho(*args, **kwargs)
+    prefix = f"[S{shard_idx}] " if parallel else ""
+    for mod_path, project_id in modules:
+        # Per-module progress state (local — no shared mutation).
+        parse_state: dict = {"shown": False, "indexed": 0, "total": 0,
+                              "last_ts": 0.0, "printed_zero": False}
+        call_state: dict = {"shown": False, "count": 0, "last_ts": 0.0,
+                             "started_at": 0.0}
+        def _progress(event: str, payload: dict) -> None:
+            now = time.perf_counter()
+            if event == "scan_done":
+                with output_lock:
+                    _phase(f"{prefix}Walking files...", f"{int(payload.get('files_found', 0))} files found")
+                return
+            if event == "plan_done":
+                to_index = int(payload.get("files_to_index", 0))
+                deleted = int(payload.get("deleted_files", 0))
+                mode = str(payload.get("mode", "incremental"))
+                parse_state["total"] = to_index
+                with output_lock:
+                    _phase(f"{prefix}Index mode...", f"{mode} ({to_index} files, {deleted} deleted)")
+                if to_index == 0:
+                    with output_lock:
+                        _phase(f"{prefix}Parsing code...", "0/0")
+                    parse_state["printed_zero"] = True
+                return
+            if event == "parse_progress":
+                indexed = int(payload.get("indexed", 0))
+                total = int(payload.get("total", 0))
+                parse_state["indexed"] = indexed
+                parse_state["total"] = total
+                if total == 0:
+                    return
+                if indexed == total or (now - parse_state["last_ts"]) >= 0.2:
+                    if not parallel:
+                        # In-place progress bar only makes sense in serial mode.
+                        click.echo(
+                            f"\r{prefix}Parsing code...   {_bar(indexed, total)} {indexed}/{total}  ",
+                            nl=False,
+                        )
+                    else:
+                        with output_lock:
+                            click.echo(
+                                f"\r{prefix}Parsing {indexed}/{total}  ",
+                                nl=False,
+                            )
+                    parse_state["shown"] = True
+                    parse_state["last_ts"] = now
+                return
+            if event in ("resolve_calls_start",):
+                if parse_state["shown"]:
+                    with output_lock:
+                        click.echo()
+                    parse_state["shown"] = False
+                call_state["started_at"] = now
+                with output_lock:
+                    _phase(f"{prefix}Tracing calls...", "starting...")
+                return
+            if event == "resolve_calls_progress":
+                call_state["count"] = int(payload.get("calls_resolved", 0))
+                if (now - call_state["last_ts"]) >= 0.25:
+                    elapsed_s = now - call_state["started_at"]
+                    if not parallel:
+                        click.echo(
+                            f"\r{_spinner_char()} {prefix}Tracing calls...   "
+                            f"{call_state['count']:>6} resolved  {elapsed_s:.1f}s  ",
+                            nl=False,
+                        )
+                    else:
+                        with output_lock:
+                            click.echo(
+                                f"\r{prefix}Calls: {call_state['count']} ({elapsed_s:.0f}s)  ",
+                                nl=False,
+                            )
+                    call_state["shown"] = True
+                    call_state["last_ts"] = now
+                return
+            if event == "resolve_calls_done":
+                if call_state["shown"]:
+                    with output_lock:
+                        click.echo()
+                call_state["shown"] = False
+                elapsed_s = (now - call_state["started_at"]) if call_state["started_at"] else 0.0
+                n = int(payload.get("calls_resolved", 0))
+                with output_lock:
+                    _phase(f"{prefix}Tracing calls...", f"{n} calls resolved  ({elapsed_s:.1f}s)")
+                return
+            if event == "resolve_types_start":
+                with output_lock:
+                    _phase(f"{prefix}Analyzing types...", "running")
+                return
+            if event == "resolve_types_done":
+                n = int(payload.get("type_relationships", 0))
+                with output_lock:
+                    _phase(f"{prefix}Analyzing types...", f"{n} type relationships")
+                return
+        shard_store = sg.shard(project_id)
+        indexer = JavaIndexer(shard_store)
+        result = indexer.index_project(
+            mod_path, full=full, progress=_progress, project_id=project_id, embed=embed
+        )
+        results.append(result)
+        total_files += result.files_found
+        # Flush any dangling progress line.
+        if parse_state["shown"]:
+            with output_lock:
+                click.echo()
+    return shard_idx, results, total_files
+def _show_shard_topology(as_json: bool) -> None:
+    """Display the current shard routing topology and imbalance metrics."""
+    router = ShardRouter()
+    sg = ShardedGraphStore(read_only=True)
+    topology = sg.describe()
+    # Gather project → shard mapping from all shards.
+    shard_project_counts: dict[int, list[str]] = {i: [] for i in range(router.num_shards)}
+    for p in sg.list_project_metadata():
+        pid = p.get("id", "")
+        idx = router.shard_for(pid)
+        shard_project_counts[idx].append(pid)
+    counts = [len(v) for v in shard_project_counts.values()]
+    total = sum(counts)
+    median = sorted(counts)[len(counts) // 2] if counts else 0
+    max_count = max(counts) if counts else 0
+    imbalance = (max_count / median) if median else 1.0
+    if as_json:
+        _echo_json({
+            "topology": topology,
+            "project_distribution": {str(k): v for k, v in shard_project_counts.items()},
+            "imbalance_ratio": round(imbalance, 2),
+        }, as_json=True)
+        return
+    click.secho(f"Shard topology ({router.num_shards} shards)", fg="cyan")
+    click.echo(f"  Directory : {router.shards_dir}")
+    click.echo(f"  Ring size : {len(router._ring)} virtual nodes ({router.num_shards} × {150})")
+    click.echo(f"  Projects  : {total} total, imbalance ratio {imbalance:.2f}x")
+    click.echo()
+    header = f"{'Shard':>6}  {'Projects':>9}  {'DB exists':>10}  Path"
+    click.secho(header, fg="cyan")
+    click.echo("-" * 60)
+    for i, info in enumerate(topology.get("shards", [])):
+        plist = shard_project_counts.get(i, [])
+        exists_str = "yes" if info.get("exists") else "no"
+        click.echo(f"{i:>6}  {len(plist):>9}  {exists_str:>10}  {info.get('db_path', '')}")
+        for pid in plist:
+            click.echo(f"{'':>6}  {'':>9}  {'':>10}    {pid}")
+    if imbalance > 2.0:
+        click.secho(
+            f"\nWarning: imbalance ratio {imbalance:.1f}x. Consider re-indexing to redistribute projects.",
+            fg="yellow",
+        )
 @click.group()
 def main() -> None:
     """CodeSpine CLI."""
@@ -130,8 +325,12 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
                 fg="yellow",
             )
-    store = GraphStore(read_only=False)
-    indexer = JavaIndexer(store)
+    # ShardedGraphStore routes each project to its dedicated DB shard.
+    # For single-project analysis this is transparent — shard() always
+    # returns a GraphStore pointing to the correct shard path.
+    sg = ShardedGraphStore(read_only=False)
+    # The indexer is initialised per-module below with the right shard store.
+    # We keep a single ShardedGraphStore to fan-out cross-module linking later.
     # --- Workspace → project → module detection ---
     # Level 1: workspace (e.g. ~/IdeaProjects/) may contain independent projects.
@@ -164,96 +363,69 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
     root_basename = os.path.basename(abs_path)
-    # Shared progress state (reset per module)
-    parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0, "printed_zero": False}
-    call_state = {"shown": False, "count": 0, "last_ts": 0.0, "started_at": 0.0}
-    def _reset_state() -> None:
-        for k in list(parse_state):
-            parse_state[k] = False if isinstance(parse_state[k], bool) else (0.0 if isinstance(parse_state[k], float) else 0)
-        parse_state["last_ts"] = 0.0
-    def _progress(event: str, payload: dict) -> None:
-        now = time.perf_counter()
-        if event == "scan_done":
-            _phase("Walking files...", f"{int(payload.get('files_found', 0))} files found")
-            return
-        if event == "plan_done":
-            to_index = int(payload.get("files_to_index", 0))
-            deleted = int(payload.get("deleted_files", 0))
-            mode = str(payload.get("mode", "incremental"))
-            parse_state["total"] = to_index
-            _phase("Index mode...", f"{mode} ({to_index} files to index, {deleted} deleted)")
-            if to_index == 0:
-                _phase("Parsing code...", "0/0")
-                parse_state["printed_zero"] = True
-            return
-        if event == "parse_progress":
-            indexed = int(payload.get("indexed", 0))
-            total = int(payload.get("total", 0))
-            parse_state["indexed"] = indexed
-            parse_state["total"] = total
-            if total == 0:
-                return
-            if indexed == total or (now - parse_state["last_ts"]) >= 0.2:
-                click.echo(f"\rParsing code...   {_bar(indexed, total)} {indexed}/{total}  ", nl=False)
-                parse_state["shown"] = True
-                parse_state["last_ts"] = now
-            return
-        if event == "resolve_calls_start" and parse_state["shown"]:
-            click.echo()
-            parse_state["shown"] = False
-            call_state["started_at"] = now
-            _phase("Tracing calls...", "starting...")
-            return
-        if event == "resolve_calls_start":
-            call_state["started_at"] = now
-            _phase("Tracing calls...", "starting...")
-            return
-        if event == "resolve_calls_progress":
-            call_state["count"] = int(payload.get("calls_resolved", 0))
-            if (now - call_state["last_ts"]) >= 0.25:
-                elapsed_s = now - call_state["started_at"]
-                click.echo(
-                    f"\r{_spinner_char()} Tracing calls...   {call_state['count']:>6} resolved  {elapsed_s:.1f}s  ",
-                    nl=False,
-                )
-                call_state["shown"] = True
-                call_state["last_ts"] = now
-            return
-        if event == "resolve_calls_done":
-            if call_state["shown"]:
-                click.echo()
-            call_state["shown"] = False
-            elapsed_s = (now - call_state["started_at"]) if call_state["started_at"] else 0.0
-            _phase("Tracing calls...", f"{int(payload.get('calls_resolved', 0))} calls resolved  ({elapsed_s:.1f}s)")
-            return
-        if event == "resolve_types_start":
-            _phase("Analyzing types...", "running")
-            return
-        if event == "resolve_types_done":
-            _phase("Analyzing types...", f"{int(payload.get('type_relationships', 0))} type relationships")
-            return
-    # --- Index each module ---
+    # ── Group modules by target shard ─────────────────────────────────
+    # Modules that hash to different shards own separate KùzuDBs and can
+    # be indexed in parallel.  Modules in the same shard (same project
+    # root for multi-module projects) are always indexed sequentially.
+    shard_groups: dict[int, list[tuple[str, str]]] = defaultdict(list)
+    for mod_path, pid in modules_with_ids:
+        shard_groups[sg.router.shard_for(pid)].append((mod_path, pid))
     is_multi = len(modules_with_ids) > 1
+    parallel_mode = len(shard_groups) > 1  # ≥2 shards → true parallelism
+    output_lock = threading.Lock()
+    if parallel_mode:
+        click.secho(
+            f"Parallel mode: {len(shard_groups)} shards will be indexed concurrently.",
+            fg="cyan",
+        )
+    # Print which shard each module lands on (multi-module only).
+    if is_multi:
+        for s_idx, group in sorted(shard_groups.items()):
+            for _, pid in group:
+                click.secho(f"  {pid:<40} → shard {s_idx}", fg="cyan")
+    # ── Dispatch to shards ────────────────────────────────────────────
     total_files_found = 0
+    all_results: list = []
     last_result = None
-    for idx, (module_path, project_id) in enumerate(modules_with_ids):
-        if is_multi:
-            click.echo()
-            click.secho(f"[{idx + 1}/{len(modules_with_ids)}] Indexing: {project_id}", fg="cyan")
-        _reset_state()
-        last_result = indexer.index_project(
-            module_path, full=full, progress=_progress, project_id=project_id, embed=embed
+    if parallel_mode:
+        max_workers = min(len(shard_groups), 4)
+        click.echo()
+        futures_map = {}
+        with ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix="codespine-shard") as ex:
+            for s_idx, group in shard_groups.items():
+                f = ex.submit(
+                    _index_shard_group,
+                    s_idx, group, sg, full, embed, output_lock, True,
+                )
+                futures_map[f] = s_idx
+            for future in as_completed(futures_map):
+                s_idx = futures_map[future]
+                try:
+                    ret_idx, results, n_files = future.result()
+                    all_results.extend(results)
+                    total_files_found += n_files
+                    if results:
+                        last_result = results[-1]
+                    with output_lock:
+                        click.secho(f"  Shard {ret_idx} done ({n_files} files)", fg="green")
+                except Exception as exc:  # noqa: BLE001
+                    with output_lock:
+                        click.secho(f"  Shard {s_idx} FAILED: {exc}", fg="red")
+    else:
+        # Serial path — single shard (or single module).  Full progress UX.
+        only_shard_idx = next(iter(shard_groups))
+        only_group = shard_groups[only_shard_idx]
+        _, all_results, total_files_found = _index_shard_group(
+            only_shard_idx, only_group, sg, full, embed, output_lock, False,
         )
-        total_files_found += last_result.files_found
-        if parse_state["shown"]:
-            click.echo()
-        if parse_state["total"] == 0 and not parse_state["printed_zero"]:
-            _phase("Parsing code...", "0/0")
-        elif parse_state["indexed"] < parse_state["total"]:
-            _phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
+        if all_results:
+            last_result = all_results[-1]
     # ── Helper for in-place progress updates ────────────────────────────
     def _live_phase(label: str, status: str) -> None:
@@ -264,13 +436,18 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
         """Finalise an in-place phase line and move to the next line."""
         click.echo(f"\r✓ {label:<28} {result:<48}")
+    # For cross-module operations (cross-module linking, deep analysis, stats)
+    # we use the shard store for the root project (all modules share one shard).
+    root_project_id = last_result.project_id if last_result else root_basename
+    root_shard_store = sg.shard(root_project_id)
     # ── Cross-module call linking ──────────────────────────────────────
     if is_multi and len(modules_with_ids) > 1:
         xmod_label = "Cross-module linking..."
         _live_phase(xmod_label, "running")
         xmod_pids = [pid for _, pid in modules_with_ids]
         xmod_edges = link_cross_module_calls(
-            store, project_ids=xmod_pids,
+            root_shard_store, project_ids=xmod_pids,
             progress=lambda s: _live_phase(xmod_label, s),
         )
         _finish_phase(xmod_label, f"{xmod_edges} cross-module call edges")
@@ -287,7 +464,7 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
         comm_label = "Detecting communities..."
         _live_phase(comm_label, "running")
         communities = detect_communities(
-            store,
+            root_shard_store,
             progress=lambda s: _live_phase(comm_label, s),
         )
         _finish_phase(comm_label, f"{len(communities)} clusters found")
@@ -295,23 +472,23 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
         flow_label = "Detecting execution flows..."
         _live_phase(flow_label, "running")
         flows = trace_execution_flows(
-            store,
+            root_shard_store,
             progress=lambda s: _live_phase(flow_label, s),
         )
         _finish_phase(flow_label, f"{len(flows)} processes found")
         dead_label = "Finding dead code..."
         _live_phase(dead_label, "running")
-        dead = detect_dead_code(store, limit=500)
+        dead = detect_dead_code(root_shard_store, limit=500)
         _finish_phase(dead_label, f"{_dead_result_count(dead)} unreachable symbols")
         coup_label = "Analyzing git history..."
         _live_phase(coup_label, "running")
-        store.clear_coupling()
+        root_shard_store.clear_coupling()
         coupling_root = abs_path
         coupling_project = root_basename if is_multi else (last_result.project_id if last_result else root_basename)
         coupling_pairs = compute_coupling(
-            store,
+            root_shard_store,
             coupling_root,
             coupling_project,
             days=SETTINGS.default_coupling_days,
@@ -329,7 +506,7 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
         flow_label = "Detecting execution flows..."
         _live_phase(flow_label, "running (lightweight)")
         try:
-            flows = trace_execution_flows(store, max_depth=3)
+            flows = trace_execution_flows(root_shard_store, max_depth=3)
         except Exception:
             flows = []
         _finish_phase(flow_label, f"{len(flows)} flows (lightweight; rerun with --deep for full)")
@@ -337,14 +514,14 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
         dead_label = "Finding dead code..."
         _live_phase(dead_label, "running (lightweight)")
         try:
-            dead = detect_dead_code(store, limit=100)
+            dead = detect_dead_code(root_shard_store, limit=100)
         except Exception:
             dead = []
         _finish_phase(dead_label, f"{_dead_result_count(dead)} candidates (lightweight; rerun with --deep for full)")
         _phase("Analyzing git history...", "skipped (large repo; rerun with --deep)")
-    vector_count = store.query_records(
+    vector_count = root_shard_store.query_records(
         """
         MATCH (s:Symbol)
         WHERE s.embedding IS NOT NULL
@@ -355,8 +532,8 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
     vectors_stored = int(vector_count[0]["count"]) if vector_count else embeddings_generated
     _phase("Generating embeddings...", f"{vectors_stored} vectors stored")
-    symbol_count = store.query_records("MATCH (s:Symbol) RETURN count(s) as count")
-    edge_count = store.query_records("MATCH ()-[r]->() RETURN count(r) as count")
+    symbol_count = root_shard_store.query_records("MATCH (s:Symbol) RETURN count(s) as count")
+    edge_count = root_shard_store.query_records("MATCH ()-[r]->() RETURN count(r) as count")
     symbols = int(symbol_count[0]["count"]) if symbol_count else 0
     edges = int(edge_count[0]["count"]) if edge_count else 0
     elapsed = time.perf_counter() - started
@@ -376,7 +553,7 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
     # Detect unresolved imports → hint about unindexed sibling projects
     try:
-        unresolved = JavaIndexer.detect_unresolved_imports(store)
+        unresolved = JavaIndexer.detect_unresolved_imports(root_shard_store)
         if unresolved:
             click.echo()
             click.secho("⚠  Unresolved imports — consider indexing these projects:", fg="yellow")
@@ -387,13 +564,12 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
     # Publish a read replica so MCP and read-only CLI commands (search, stats…)
     # run against an isolated snapshot rather than competing with the write
-    # process's buffer pool.  The MCP daemon detects the sentinel file and
-    # hot-reloads without restarting.
+    # process's buffer pool.  Snapshot all open shards concurrently.
     snap_label = "Publishing read replica..."
     _live_phase(snap_label, "copying")
-    store._recycle_conn()
-    snapped = GraphStore.snapshot_to_read_replica()
-    _finish_phase(snap_label, "MCP will reload automatically" if snapped else "skipped (source DB not found)")
+    root_shard_store._recycle_conn()
+    sg.snapshot_all(background=False)
+    _finish_phase(snap_label, "MCP will reload automatically")
 @main.command()
@@ -523,10 +699,21 @@ def diff(range_spec: str, as_json: bool) -> None:
 @main.command()
 @click.option("--json", "as_json", is_flag=True)
-def stats(as_json: bool) -> None:
+@click.option("--shards", "show_shards", is_flag=True, help="Show shard topology and load distribution.")
+def stats(as_json: bool, show_shards: bool) -> None:
     """Show per-project and aggregate graph statistics."""
-    store = GraphStore(read_only=True)
-    projects = store.query_records("MATCH (p:Project) RETURN p.id as id, p.path as path ORDER BY p.id")
+    if show_shards:
+        _show_shard_topology(as_json)
+        return
+    # Fan-out across all shards so stats covers every project in the cluster.
+    sg = ShardedGraphStore(read_only=True)
+    all_projects_meta = sg.list_project_metadata()
+    # For detailed stats we need the per-project shard store.
+    def _project_store(pid: str):
+        return sg.shard(pid)
     if not projects:
         click.secho("No projects indexed yet. Run 'codespine analyse <path>'.", fg="yellow")
         return
@@ -534,10 +721,12 @@ def stats(as_json: bool) -> None:
     rows = []
     for p in projects:
         pid = p["id"]
-        files = store.query_records(
+        # Route each query to the project's owning shard.
+        ps = _project_store(pid)
+        files = ps.query_records(
             "MATCH (f:File) WHERE f.project_id = $pid RETURN count(f) as n", {"pid": pid}
         )
-        classes = store.query_records(
+        classes = ps.query_records(
             """
             MATCH (f:File) WHERE f.project_id = $pid
             WITH f
@@ -546,7 +735,7 @@ def stats(as_json: bool) -> None:
             """,
             {"pid": pid},
         )
-        methods = store.query_records(
+        methods = ps.query_records(
             """
             MATCH (f:File) WHERE f.project_id = $pid
             WITH f
@@ -557,7 +746,7 @@ def stats(as_json: bool) -> None:
             """,
             {"pid": pid},
         )
-        calls = store.query_records(
+        calls = ps.query_records(
             """
             MATCH (f:File) WHERE f.project_id = $pid
             WITH f
@@ -568,7 +757,7 @@ def stats(as_json: bool) -> None:
             """,
             {"pid": pid},
         )
-        emb = store.query_records(
+        emb = ps.query_records(
             """
             MATCH (f:File) WHERE f.project_id = $pid
             WITH f
@@ -580,6 +769,7 @@ def stats(as_json: bool) -> None:
         rows.append({
             "project": pid,
             "path": p["path"],
+            "shard": sg.router.shard_for(pid),
             "files": files[0]["n"] if files else 0,
             "classes": classes[0]["n"] if classes else 0,
             "methods": methods[0]["n"] if methods else 0,
@@ -592,13 +782,13 @@ def stats(as_json: bool) -> None:
         return
     col_w = max(len(r["project"]) for r in rows)
-    header = f"{'Project':<{col_w}}  {'Files':>6}  {'Classes':>8}  {'Methods':>8}  {'Calls':>7}  {'Emb':>6}  Path"
+    header = f"{'Project':<{col_w}}  {'Shard':>5}  {'Files':>6}  {'Classes':>8}  {'Methods':>8}  {'Calls':>7}  {'Emb':>6}  Path"
     click.secho(header, fg="cyan")
     click.echo("-" * len(header))
     total_files = total_classes = total_methods = total_calls = total_emb = 0
     for r in rows:
         click.echo(
-            f"{r['project']:<{col_w}}  {r['files']:>6}  {r['classes']:>8}  {r['methods']:>8}  {r['calls_out']:>7}  {r['embeddings']:>6}  {r['path']}"
+            f"{r['project']:<{col_w}}  {r.get('shard', 0):>5}  {r['files']:>6}  {r['classes']:>8}  {r['methods']:>8}  {r['calls_out']:>7}  {r['embeddings']:>6}  {r['path']}"
         )
         total_files += r["files"]
         total_classes += r["classes"]
@@ -608,7 +798,7 @@ def stats(as_json: bool) -> None:
     if len(rows) > 1:
         click.echo("-" * len(header))
         click.secho(
-            f"{'TOTAL':<{col_w}}  {total_files:>6}  {total_classes:>8}  {total_methods:>8}  {total_calls:>7}  {total_emb:>6}",
+            f"{'TOTAL':<{col_w}}  {'':>5}  {total_files:>6}  {total_classes:>8}  {total_methods:>8}  {total_calls:>7}  {total_emb:>6}",
             fg="green",
         )

{codespine-0.9.5 → codespine-0.9.7}/codespine/config.py RENAMED Viewed

@@ -4,8 +4,17 @@ from dataclasses import dataclass
 @dataclass(frozen=True)
 class Settings:
+    # Legacy single-DB paths — kept for backward compat and as defaults when
+    # sharding is disabled (num_shards == 1 or CODESPINE_SHARDS not set).
     db_path: str = os.path.expanduser("~/.codespine_db")
     db_snapshot_path: str = os.path.expanduser("~/.codespine_db_read")
+    # Sharding — new layout stores each shard under shards_dir/{N}/db
+    # num_shards: int, overridable via CODESPINE_SHARDS env var at runtime.
+    # ShardRouter reads CODESPINE_SHARDS directly; this field is the compiled default.
+    num_shards: int = 4
+    shards_dir: str = os.path.expanduser("~/.codespine/shards")
     pid_file: str = os.path.expanduser("~/.codespine.pid")
     log_file: str = os.path.expanduser("~/.codespine.log")
     embedding_cache_path: str = os.path.expanduser("~/.codespine_embedding_cache.json")

codespine 0.9.5__tar.gz → 0.9.7__tar.gz

codespine 0.9.5tar.gz → 0.9.7tar.gz