codespine 0.9.8__tar.gz → 0.9.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {codespine-0.9.8 → codespine-0.9.9}/PKG-INFO +1 -1
  2. {codespine-0.9.8 → codespine-0.9.9}/codespine/__init__.py +1 -1
  3. codespine-0.9.9/codespine/cache/__init__.py +4 -0
  4. codespine-0.9.9/codespine/cache/result_cache.py +167 -0
  5. {codespine-0.9.8 → codespine-0.9.9}/codespine/cli.py +39 -3
  6. {codespine-0.9.8 → codespine-0.9.9}/codespine/mcp/server.py +230 -5
  7. {codespine-0.9.8 → codespine-0.9.9}/codespine/search/vector.py +21 -4
  8. {codespine-0.9.8 → codespine-0.9.9}/codespine.egg-info/PKG-INFO +1 -1
  9. {codespine-0.9.8 → codespine-0.9.9}/codespine.egg-info/SOURCES.txt +3 -0
  10. {codespine-0.9.8 → codespine-0.9.9}/pyproject.toml +1 -1
  11. codespine-0.9.9/tests/test_result_cache.py +179 -0
  12. {codespine-0.9.8 → codespine-0.9.9}/LICENSE +0 -0
  13. {codespine-0.9.8 → codespine-0.9.9}/README.md +0 -0
  14. {codespine-0.9.8 → codespine-0.9.9}/codespine/analysis/__init__.py +0 -0
  15. {codespine-0.9.8 → codespine-0.9.9}/codespine/analysis/community.py +0 -0
  16. {codespine-0.9.8 → codespine-0.9.9}/codespine/analysis/context.py +0 -0
  17. {codespine-0.9.8 → codespine-0.9.9}/codespine/analysis/coupling.py +0 -0
  18. {codespine-0.9.8 → codespine-0.9.9}/codespine/analysis/crossmodule.py +0 -0
  19. {codespine-0.9.8 → codespine-0.9.9}/codespine/analysis/deadcode.py +0 -0
  20. {codespine-0.9.8 → codespine-0.9.9}/codespine/analysis/flow.py +0 -0
  21. {codespine-0.9.8 → codespine-0.9.9}/codespine/analysis/impact.py +0 -0
  22. {codespine-0.9.8 → codespine-0.9.9}/codespine/config.py +0 -0
  23. {codespine-0.9.8 → codespine-0.9.9}/codespine/db/__init__.py +0 -0
  24. {codespine-0.9.8 → codespine-0.9.9}/codespine/db/duckdb_store.py +0 -0
  25. {codespine-0.9.8 → codespine-0.9.9}/codespine/db/schema.py +0 -0
  26. {codespine-0.9.8 → codespine-0.9.9}/codespine/db/store.py +0 -0
  27. {codespine-0.9.8 → codespine-0.9.9}/codespine/diff/__init__.py +0 -0
  28. {codespine-0.9.8 → codespine-0.9.9}/codespine/diff/branch_diff.py +0 -0
  29. {codespine-0.9.8 → codespine-0.9.9}/codespine/guide.py +0 -0
  30. {codespine-0.9.8 → codespine-0.9.9}/codespine/indexer/__init__.py +0 -0
  31. {codespine-0.9.8 → codespine-0.9.9}/codespine/indexer/call_resolver.py +0 -0
  32. {codespine-0.9.8 → codespine-0.9.9}/codespine/indexer/di_resolver.py +0 -0
  33. {codespine-0.9.8 → codespine-0.9.9}/codespine/indexer/engine.py +0 -0
  34. {codespine-0.9.8 → codespine-0.9.9}/codespine/indexer/java_parser.py +0 -0
  35. {codespine-0.9.8 → codespine-0.9.9}/codespine/indexer/symbol_builder.py +0 -0
  36. {codespine-0.9.8 → codespine-0.9.9}/codespine/mcp/__init__.py +0 -0
  37. {codespine-0.9.8 → codespine-0.9.9}/codespine/noise/__init__.py +0 -0
  38. {codespine-0.9.8 → codespine-0.9.9}/codespine/noise/blocklist.py +0 -0
  39. {codespine-0.9.8 → codespine-0.9.9}/codespine/overlay/__init__.py +0 -0
  40. {codespine-0.9.8 → codespine-0.9.9}/codespine/overlay/git_state.py +0 -0
  41. {codespine-0.9.8 → codespine-0.9.9}/codespine/overlay/merge.py +0 -0
  42. {codespine-0.9.8 → codespine-0.9.9}/codespine/overlay/store.py +0 -0
  43. {codespine-0.9.8 → codespine-0.9.9}/codespine/search/__init__.py +0 -0
  44. {codespine-0.9.8 → codespine-0.9.9}/codespine/search/bm25.py +0 -0
  45. {codespine-0.9.8 → codespine-0.9.9}/codespine/search/fuzzy.py +0 -0
  46. {codespine-0.9.8 → codespine-0.9.9}/codespine/search/hybrid.py +0 -0
  47. {codespine-0.9.8 → codespine-0.9.9}/codespine/search/rrf.py +0 -0
  48. {codespine-0.9.8 → codespine-0.9.9}/codespine/sharding/__init__.py +0 -0
  49. {codespine-0.9.8 → codespine-0.9.9}/codespine/sharding/router.py +0 -0
  50. {codespine-0.9.8 → codespine-0.9.9}/codespine/sharding/store.py +0 -0
  51. {codespine-0.9.8 → codespine-0.9.9}/codespine/watch/__init__.py +0 -0
  52. {codespine-0.9.8 → codespine-0.9.9}/codespine/watch/git_hook.py +0 -0
  53. {codespine-0.9.8 → codespine-0.9.9}/codespine/watch/watcher.py +0 -0
  54. {codespine-0.9.8 → codespine-0.9.9}/codespine.egg-info/dependency_links.txt +0 -0
  55. {codespine-0.9.8 → codespine-0.9.9}/codespine.egg-info/entry_points.txt +0 -0
  56. {codespine-0.9.8 → codespine-0.9.9}/codespine.egg-info/requires.txt +0 -0
  57. {codespine-0.9.8 → codespine-0.9.9}/codespine.egg-info/top_level.txt +0 -0
  58. {codespine-0.9.8 → codespine-0.9.9}/gindex.py +0 -0
  59. {codespine-0.9.8 → codespine-0.9.9}/setup.cfg +0 -0
  60. {codespine-0.9.8 → codespine-0.9.9}/tests/test_branch_diff_normalize.py +0 -0
  61. {codespine-0.9.8 → codespine-0.9.9}/tests/test_call_resolver.py +0 -0
  62. {codespine-0.9.8 → codespine-0.9.9}/tests/test_community_detection.py +0 -0
  63. {codespine-0.9.8 → codespine-0.9.9}/tests/test_deadcode.py +0 -0
  64. {codespine-0.9.8 → codespine-0.9.9}/tests/test_duckdb_store.py +0 -0
  65. {codespine-0.9.8 → codespine-0.9.9}/tests/test_index_and_hybrid.py +0 -0
  66. {codespine-0.9.8 → codespine-0.9.9}/tests/test_java_parser.py +0 -0
  67. {codespine-0.9.8 → codespine-0.9.9}/tests/test_multimodule_index.py +0 -0
  68. {codespine-0.9.8 → codespine-0.9.9}/tests/test_overlay.py +0 -0
  69. {codespine-0.9.8 → codespine-0.9.9}/tests/test_search_ranking.py +0 -0
  70. {codespine-0.9.8 → codespine-0.9.9}/tests/test_sharding.py +0 -0
  71. {codespine-0.9.8 → codespine-0.9.9}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.9.8
3
+ Version: 0.9.9
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.9.8"
4
+ __version__ = "0.9.9"
@@ -0,0 +1,4 @@
1
+ """Result caching for MCP tools."""
2
+ from codespine.cache.result_cache import ResultCache
3
+
4
+ __all__ = ["ResultCache"]
@@ -0,0 +1,167 @@
1
+ """LRU result cache for CodeSpine MCP tools.
2
+
3
+ Avoids recomputing expensive analyses (impact BFS, dead-code scan, community
4
+ lookup) when the same arguments are passed and the underlying index hasn't
5
+ changed since the last call.
6
+
7
+ Cache key: ``(tool_name, args_hash, snapshot_mtime_rounded)``
8
+ - ``tool_name`` — the MCP tool that produced the result
9
+ - ``args_hash`` — SHA-1 of the JSON-serialised arguments (sorted keys)
10
+ - ``snapshot_mtime_rounded`` — read-replica mtime rounded to 1 s, so a new
11
+ snapshot invalidates all cached results for the affected store
12
+
13
+ TTL: entries are evicted after ``ttl_s`` seconds (default 300 s / 5 min) even
14
+ if the cache isn't full, preventing stale results across long sessions.
15
+
16
+ Usage
17
+ -----
18
+ from codespine.cache.result_cache import ResultCache
19
+
20
+ _cache = ResultCache(maxsize=256, ttl_s=300.0)
21
+
22
+ key = _cache.make_key("get_impact", {"symbol": "Foo", "project": "myapp"}, mtime)
23
+ cached = _cache.get(key)
24
+ if cached is not None:
25
+ return cached
26
+ result = expensive_computation(...)
27
+ _cache.put(key, result)
28
+ return result
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import hashlib
34
+ import json
35
+ import threading
36
+ import time
37
+ from collections import OrderedDict
38
+ from typing import Any
39
+
40
+
41
+ class ResultCache:
42
+ """Thread-safe LRU cache for pre-serialised JSON tool results.
43
+
44
+ Parameters
45
+ ----------
46
+ maxsize:
47
+ Maximum number of entries to keep. Oldest entry is evicted when
48
+ the cache is full (LRU eviction).
49
+ ttl_s:
50
+ Time-to-live in seconds. Entries older than this are treated as
51
+ missing even if they're still in the cache.
52
+ """
53
+
54
+ def __init__(self, maxsize: int = 256, ttl_s: float = 300.0) -> None:
55
+ self._maxsize = maxsize
56
+ self._ttl = ttl_s
57
+ # OrderedDict preserves insertion order: oldest → newest
58
+ self._cache: OrderedDict[tuple, tuple[str, float]] = OrderedDict()
59
+ self._lock = threading.Lock()
60
+ self._hits = 0
61
+ self._misses = 0
62
+
63
+ # ------------------------------------------------------------------
64
+ # Key construction
65
+ # ------------------------------------------------------------------
66
+
67
+ @staticmethod
68
+ def make_key(
69
+ tool_name: str,
70
+ args: dict[str, Any],
71
+ snapshot_mtime: float,
72
+ ) -> tuple:
73
+ """Build a cache key from tool name, arguments, and index timestamp.
74
+
75
+ Parameters
76
+ ----------
77
+ tool_name:
78
+ Name of the MCP tool (e.g. ``"get_impact"``).
79
+ args:
80
+ Tool arguments dict (``None`` values included so missing optional
81
+ args don't collide with explicitly-set ones).
82
+ snapshot_mtime:
83
+ Last-modified time of the read-replica sentinel file, rounded to
84
+ 1-second precision. A new snapshot invalidates old entries.
85
+ """
86
+ try:
87
+ args_bytes = json.dumps(args, sort_keys=True, default=str).encode()
88
+ except Exception:
89
+ args_bytes = str(args).encode()
90
+ args_hash = hashlib.sha1(args_bytes).hexdigest()[:16]
91
+ return (tool_name, args_hash, round(snapshot_mtime, 0))
92
+
93
+ # ------------------------------------------------------------------
94
+ # Cache operations
95
+ # ------------------------------------------------------------------
96
+
97
+ def get(self, key: tuple) -> str | None:
98
+ """Return the cached value for *key*, or ``None`` if missing/expired."""
99
+ with self._lock:
100
+ if key not in self._cache:
101
+ self._misses += 1
102
+ return None
103
+ value, inserted_at = self._cache[key]
104
+ if time.monotonic() - inserted_at > self._ttl:
105
+ del self._cache[key]
106
+ self._misses += 1
107
+ return None
108
+ # Promote to most-recently-used position.
109
+ self._cache.move_to_end(key)
110
+ self._hits += 1
111
+ return value
112
+
113
+ def put(self, key: tuple, value: str) -> None:
114
+ """Store *value* under *key*. Evicts LRU entry if cache is full."""
115
+ with self._lock:
116
+ if key in self._cache:
117
+ self._cache.move_to_end(key)
118
+ self._cache[key] = (value, time.monotonic())
119
+ # Evict oldest entries until we're within maxsize.
120
+ while len(self._cache) > self._maxsize:
121
+ self._cache.popitem(last=False)
122
+
123
+ def invalidate(self) -> int:
124
+ """Clear the entire cache. Call after any index mutation.
125
+
126
+ Returns the number of entries evicted.
127
+ """
128
+ with self._lock:
129
+ n = len(self._cache)
130
+ self._cache.clear()
131
+ return n
132
+
133
+ def invalidate_tool(self, tool_name: str) -> int:
134
+ """Evict all entries for a specific tool.
135
+
136
+ Returns the number of entries removed.
137
+ """
138
+ with self._lock:
139
+ keys_to_remove = [k for k in self._cache if k[0] == tool_name]
140
+ for k in keys_to_remove:
141
+ del self._cache[k]
142
+ return len(keys_to_remove)
143
+
144
+ # ------------------------------------------------------------------
145
+ # Stats / introspection
146
+ # ------------------------------------------------------------------
147
+
148
+ def stats(self) -> dict[str, Any]:
149
+ """Return cache statistics (size, hit/miss counts, hit rate)."""
150
+ with self._lock:
151
+ total = self._hits + self._misses
152
+ return {
153
+ "size": len(self._cache),
154
+ "maxsize": self._maxsize,
155
+ "ttl_s": self._ttl,
156
+ "hits": self._hits,
157
+ "misses": self._misses,
158
+ "hit_rate": round(self._hits / total, 3) if total else 0.0,
159
+ }
160
+
161
+ def __repr__(self) -> str: # pragma: no cover
162
+ s = self.stats()
163
+ return (
164
+ f"ResultCache(size={s['size']}/{s['maxsize']}, "
165
+ f"hits={s['hits']}, misses={s['misses']}, "
166
+ f"hit_rate={s['hit_rate']:.1%})"
167
+ )
@@ -293,7 +293,13 @@ def main() -> None:
293
293
  @main.command()
294
294
  @click.argument("path", type=click.Path(exists=True))
295
295
  @click.option("--full/--incremental", default=False, show_default=True)
296
- @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
296
+ @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses (auto-on for repos ≤3 k files).")
297
+ @click.option(
298
+ "--incremental-deep",
299
+ is_flag=True,
300
+ default=False,
301
+ help="Force deep analysis even during incremental re-index. Useful after large refactors.",
302
+ )
297
303
  @click.option(
298
304
  "--embed/--no-embed",
299
305
  default=True,
@@ -301,7 +307,7 @@ def main() -> None:
301
307
  help="Generate vector embeddings. Uses sentence-transformers if installed (pip install codespine[ml]), otherwise falls back to hash-based vectors.",
302
308
  )
303
309
  @click.option("--allow-running", is_flag=True, hidden=True, help="Skip MCP running check (used by MCP analyse_project tool).")
304
- def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool) -> None:
310
+ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bool, allow_running: bool) -> None:
305
311
  """Index a local Java project (auto-detects workspace / Maven / Gradle layout).
306
312
 
307
313
  Embeddings are generated by default. If sentence-transformers is installed
@@ -459,7 +465,7 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
459
465
  dead: list[dict] = []
460
466
  coupling_pairs: list[dict] = []
461
467
 
462
- should_run_deep = deep or total_files_found <= 1200
468
+ should_run_deep = deep or incremental_deep or total_files_found <= 3000
463
469
  if should_run_deep:
464
470
  comm_label = "Detecting communities..."
465
471
  _live_phase(comm_label, "running")
@@ -1138,6 +1144,36 @@ def stop() -> None:
1138
1144
  os.remove(SETTINGS.pid_file)
1139
1145
 
1140
1146
 
1147
+ @main.command("install-model")
1148
+ def install_model() -> None:
1149
+ """Download and cache the sentence-transformers embedding model.
1150
+
1151
+ Requires 'pip install codespine[ml]'. The model is downloaded once and
1152
+ cached locally; subsequent analyse runs use the cache without network access.
1153
+ """
1154
+ try:
1155
+ from sentence_transformers import SentenceTransformer # noqa: F401
1156
+ except ImportError:
1157
+ click.secho(
1158
+ "sentence-transformers is not installed.\n"
1159
+ "Run: pip install codespine[ml]",
1160
+ fg="red",
1161
+ )
1162
+ return
1163
+
1164
+ model_name = SETTINGS.embedding_model
1165
+ click.secho(f"Downloading model '{model_name}' …", fg="cyan")
1166
+ try:
1167
+ from sentence_transformers import SentenceTransformer
1168
+
1169
+ model = SentenceTransformer(model_name)
1170
+ # Run a tiny inference to confirm the model is usable.
1171
+ _ = model.encode(["hello world"])
1172
+ click.secho(f"✓ Model '{model_name}' ready. Semantic search is now enabled.", fg="green")
1173
+ except Exception as exc:
1174
+ click.secho(f"✗ Failed to load model: {exc}", fg="red")
1175
+
1176
+
1141
1177
  @main.command("run-mcp", hidden=True)
1142
1178
  def run_mcp() -> None:
1143
1179
  """Run MCP server in stdio mode."""
@@ -30,6 +30,7 @@ from codespine.watch.watcher import (
30
30
  get_overlay_status as get_overlay_status_state,
31
31
  promote_overlay as promote_overlay_state,
32
32
  )
33
+ from codespine.cache.result_cache import ResultCache
33
34
 
34
35
 
35
36
  def _json(data: dict) -> str:
@@ -271,6 +272,49 @@ def build_mcp_server(store, repo_path_provider):
271
272
  _watch: dict = {"proc": None, "path": None, "started_at": None, "interval": 30}
272
273
  _analyse: dict = {"proc": None, "path": None, "started_at": None, "log_path": None, "returncode": None}
273
274
 
275
+ # Per-server result cache (FR-12): LRU cache keyed by (tool, args_hash, snapshot_mtime).
276
+ # Invalidated automatically when the read replica sentinel file changes.
277
+ _result_cache = ResultCache(maxsize=256, ttl_s=300.0)
278
+
279
+ def _cache_key(tool_name: str, **kwargs):
280
+ """Build a cache key using current snapshot mtime."""
281
+ try:
282
+ sentinel = getattr(store, "_snapshot_path", "") + ".updated"
283
+ mtime = os.path.getmtime(sentinel) if os.path.exists(sentinel) else 0.0
284
+ except OSError:
285
+ mtime = 0.0
286
+ return ResultCache.make_key(tool_name, kwargs, mtime)
287
+
288
+ # FR-03: Auto-start watch if indexed projects exist and watch is not running.
289
+ def _maybe_auto_start_watch() -> None:
290
+ try:
291
+ projs = store.query_records(
292
+ "MATCH (p:Project) RETURN p.path as path, p.id as id ORDER BY p.indexed_at DESC LIMIT 1"
293
+ )
294
+ if not projs:
295
+ return
296
+ watch_path = projs[0].get("path", "")
297
+ if not watch_path or not os.path.isdir(watch_path):
298
+ return
299
+ # Start watch as a background subprocess (same as start_watch tool).
300
+ cmd = [sys.executable, "-m", "codespine.cli", "watch", watch_path,
301
+ "--interval", "30", "--allow-running"]
302
+ proc = subprocess.Popen(
303
+ cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
304
+ start_new_session=True,
305
+ )
306
+ _watch["proc"] = proc
307
+ _watch["path"] = watch_path
308
+ _watch["started_at"] = time.time()
309
+ _LOGGER.info("Auto-started watch on %s (pid %d)", watch_path, proc.pid)
310
+ except Exception as exc:
311
+ _LOGGER.debug("Auto-watch skipped: %s", exc)
312
+
313
+ # Trigger auto-watch in a daemon thread so server startup isn't delayed.
314
+ import threading as _threading
315
+ _auto_watch_thread = _threading.Thread(target=_maybe_auto_start_watch, daemon=True, name="codespine-auto-watch")
316
+ _auto_watch_thread.start()
317
+
274
318
  # ------------------------------------------------------------------
275
319
  # Connectivity / feature discovery
276
320
  # ------------------------------------------------------------------
@@ -526,6 +570,10 @@ def build_mcp_server(store, repo_path_provider):
526
570
  project scopes the target symbol lookup; cross-project callers are always included.
527
571
  """
528
572
  try:
573
+ _ck = _cache_key("get_impact", symbol=symbol, max_depth=max_depth, project=project)
574
+ _cached = _result_cache.get(_ck)
575
+ if _cached is not None:
576
+ return _cached
529
577
  normalized = _normalize_symbol_input(symbol)
530
578
  result = analyze_impact(store, normalized, max_depth=max_depth, project=project)
531
579
  if not result.get("resolved_to"):
@@ -533,7 +581,9 @@ def build_mcp_server(store, repo_path_provider):
533
581
  result = analyze_impact(store, symbol, max_depth=max_depth, project=project)
534
582
  if not result.get("resolved_to"):
535
583
  return {"available": False, "note": f"Symbol '{symbol}' not found in the index."}
536
- return _staleness_meta(store, {"available": True, **result}, project, overlay_store=overlay_store)
584
+ out = _staleness_meta(store, {"available": True, **result}, project, overlay_store=overlay_store)
585
+ _result_cache.put(_ck, out)
586
+ return out
537
587
  except Exception as exc:
538
588
  return _safe_tool_response("get_impact", exc)
539
589
 
@@ -558,6 +608,11 @@ def build_mcp_server(store, repo_path_provider):
558
608
  exemption rules — useful for validating that the feature is working
559
609
  even when the dead list is empty.
560
610
  """
611
+ _ck = _cache_key("detect_dead_code", limit=limit, project=project, strict=strict)
612
+ _cached = _result_cache.get(_ck)
613
+ if _cached is not None:
614
+ return _cached
615
+
561
616
  raw = detect_dead_code_analysis(store, limit=limit, project=project, strict=strict)
562
617
  if raw is None:
563
618
  return _no_symbols_response()
@@ -571,12 +626,14 @@ def build_mcp_server(store, repo_path_provider):
571
626
  else:
572
627
  dead.append(entry)
573
628
 
574
- return _staleness_meta(store, {
629
+ out = _staleness_meta(store, {
575
630
  "available": True,
576
631
  "dead_code": dead,
577
632
  "count": len(dead),
578
633
  "exemption_stats": stats,
579
634
  }, project, overlay_store=overlay_store, deep_scope=True)
635
+ _result_cache.put(_ck, out)
636
+ return out
580
637
 
581
638
  @mcp.tool()
582
639
  def trace_execution_flows(entry_symbol: str | None = None, max_depth: int = 6, project: str | None = None):
@@ -1722,6 +1779,12 @@ def build_mcp_server(store, repo_path_provider):
1722
1779
  enriched.append(entry)
1723
1780
  return enriched
1724
1781
 
1782
+ target_pid = target["project_id"]
1783
+ enriched_callers = _enrich(callers, extra_keys=["confidence", "reason"])
1784
+ # FR-11: label cross-project callers so consumers can separate them.
1785
+ local_callers = [c for c in enriched_callers if c.get("project_id") == target_pid]
1786
+ cross_project_callers = [c for c in enriched_callers if c.get("project_id") != target_pid]
1787
+
1725
1788
  result = {
1726
1789
  "available": True,
1727
1790
  "target": {
@@ -1730,9 +1793,10 @@ def build_mcp_server(store, repo_path_provider):
1730
1793
  "signature": target["signature"],
1731
1794
  "class_fqcn": target["class_fqcn"],
1732
1795
  "file_path": target["file_path"],
1733
- "project_id": target["project_id"],
1796
+ "project_id": target_pid,
1734
1797
  },
1735
- "callers": _enrich(callers, extra_keys=["confidence", "reason"]),
1798
+ "callers": local_callers,
1799
+ "cross_project_callers": cross_project_callers,
1736
1800
  "callees": _enrich(callees, extra_keys=["confidence", "reason"]),
1737
1801
  "siblings": [
1738
1802
  {"name": s["name"], "signature": s["signature"]}
@@ -1741,7 +1805,8 @@ def build_mcp_server(store, repo_path_provider):
1741
1805
  "overrides": _enrich(overrides_up),
1742
1806
  "overridden_by": _enrich(overrides_down),
1743
1807
  "summary": {
1744
- "callers": len(callers),
1808
+ "callers": len(local_callers),
1809
+ "cross_project_callers": len(cross_project_callers),
1745
1810
  "callees": len(callees),
1746
1811
  "siblings": len(siblings),
1747
1812
  "overrides": len(overrides_up),
@@ -2668,6 +2733,166 @@ def build_mcp_server(store, repo_path_provider):
2668
2733
  except Exception as exc:
2669
2734
  return _safe_tool_response("find_pattern", exc)
2670
2735
 
2736
+ # ------------------------------------------------------------------
2737
+ # rename_plan (FR-11 / Phase 5)
2738
+ # ------------------------------------------------------------------
2739
+
2740
+ @mcp.tool()
2741
+ def rename_plan(symbol: str, new_name: str, project: str | None = None):
2742
+ """
2743
+ Safe cross-project rename plan for a method, class, or field.
2744
+
2745
+ Finds all references to the symbol (callers, overrides, interface
2746
+ declarations, direct mentions) and returns a structured list of
2747
+ files_to_modify with the current text and suggested replacement.
2748
+
2749
+ This tool does NOT modify files — it produces a plan that you (or your
2750
+ editor) can apply. Review the plan before making changes.
2751
+
2752
+ Parameters:
2753
+ symbol – Method name, class name, or FQN to rename.
2754
+ new_name – The desired new name (simple name, not FQN).
2755
+ project – Optional project scope for the initial symbol lookup.
2756
+ """
2757
+ try:
2758
+ normalized = _normalize_symbol_input(symbol)
2759
+ project_clause = "AND f.project_id = $proj" if project else ""
2760
+ params: dict = {"q": normalized, "raw": symbol}
2761
+ if project:
2762
+ params["proj"] = project
2763
+
2764
+ # 1. Resolve to methods + classes with the given name.
2765
+ method_recs = store.query_records(
2766
+ f"""
2767
+ MATCH (m:Method), (c:Class), (f:File)
2768
+ WHERE m.class_id = c.id AND c.file_id = f.id {project_clause}
2769
+ AND (m.id = $q OR m.id = $raw
2770
+ OR lower(m.name) = lower($q)
2771
+ OR lower(m.signature) CONTAINS lower($q))
2772
+ RETURN m.id as id, m.name as name, m.signature as signature,
2773
+ c.fqcn as class_fqcn, f.path as file_path,
2774
+ f.project_id as project_id, 'method' as kind
2775
+ LIMIT 20
2776
+ """,
2777
+ params,
2778
+ )
2779
+ class_recs = store.query_records(
2780
+ f"""
2781
+ MATCH (c:Class), (f:File)
2782
+ WHERE c.file_id = f.id {project_clause}
2783
+ AND (c.id = $q OR c.id = $raw
2784
+ OR lower(c.name) = lower($q) OR lower(c.fqcn) = lower($q))
2785
+ RETURN c.id as id, c.name as name, c.fqcn as class_fqcn,
2786
+ f.path as file_path, f.project_id as project_id, 'class' as kind
2787
+ LIMIT 10
2788
+ """,
2789
+ params,
2790
+ )
2791
+
2792
+ all_targets = method_recs + class_recs
2793
+ if not all_targets:
2794
+ return {
2795
+ "available": False,
2796
+ "note": f"Symbol '{symbol}' not found. Try find_symbol() first.",
2797
+ }
2798
+
2799
+ # 2. Collect all files that declare or reference the targets.
2800
+ target_ids = [r["id"] for r in all_targets]
2801
+ declaration_files: dict[str, dict] = {} # file_path → info
2802
+
2803
+ # Declaration sites.
2804
+ for rec in all_targets:
2805
+ fp = rec.get("file_path", "")
2806
+ if fp:
2807
+ declaration_files.setdefault(fp, {
2808
+ "file_path": fp,
2809
+ "project_id": rec.get("project_id"),
2810
+ "changes": [],
2811
+ })["changes"].append({
2812
+ "kind": "declaration",
2813
+ "symbol_kind": rec.get("kind", "method"),
2814
+ "current_name": rec.get("name", symbol),
2815
+ "suggested_name": new_name,
2816
+ "note": f"Rename {rec.get('kind','method')} declaration",
2817
+ })
2818
+
2819
+ # Caller sites (only methods have call sites).
2820
+ method_ids = [r["id"] for r in method_recs]
2821
+ if method_ids:
2822
+ ph = ", ".join("$mid" + str(i) for i in range(len(method_ids)))
2823
+ caller_params = {f"mid{i}": v for i, v in enumerate(method_ids)}
2824
+ callers = store.query_records(
2825
+ f"""
2826
+ MATCH (caller:Method)-[:CALLS]->(m:Method), (c:Class), (f:File)
2827
+ WHERE m.id IN [{ph}]
2828
+ AND caller.class_id = c.id AND c.file_id = f.id
2829
+ RETURN DISTINCT f.path as file_path, f.project_id as project_id,
2830
+ caller.name as caller_name
2831
+ LIMIT 100
2832
+ """,
2833
+ caller_params,
2834
+ )
2835
+ for cr in callers:
2836
+ fp = cr.get("file_path", "")
2837
+ if fp:
2838
+ declaration_files.setdefault(fp, {
2839
+ "file_path": fp,
2840
+ "project_id": cr.get("project_id"),
2841
+ "changes": [],
2842
+ })["changes"].append({
2843
+ "kind": "call_site",
2844
+ "current_text": symbol,
2845
+ "suggested_text": new_name,
2846
+ "note": f"Call site in {cr.get('caller_name','<unknown>')}",
2847
+ })
2848
+
2849
+ # Override sites.
2850
+ for method_id in method_ids:
2851
+ overrides = store.query_records(
2852
+ """
2853
+ MATCH (child:Method)-[:OVERRIDES]->(m:Method {id: $mid}),
2854
+ (cc:Class), (ff:File)
2855
+ WHERE child.class_id = cc.id AND cc.file_id = ff.id
2856
+ RETURN ff.path as file_path, ff.project_id as project_id,
2857
+ child.name as name
2858
+ """,
2859
+ {"mid": method_id},
2860
+ )
2861
+ for ov in overrides:
2862
+ fp = ov.get("file_path", "")
2863
+ if fp:
2864
+ declaration_files.setdefault(fp, {
2865
+ "file_path": fp,
2866
+ "project_id": ov.get("project_id"),
2867
+ "changes": [],
2868
+ })["changes"].append({
2869
+ "kind": "override",
2870
+ "current_name": ov.get("name", symbol),
2871
+ "suggested_name": new_name,
2872
+ "note": "Override — must match new name",
2873
+ })
2874
+
2875
+ files_to_modify = sorted(
2876
+ declaration_files.values(), key=lambda x: x.get("file_path", "")
2877
+ )
2878
+ projects_affected = {f["project_id"] for f in files_to_modify if f.get("project_id")}
2879
+
2880
+ return _staleness_meta(store, {
2881
+ "available": True,
2882
+ "symbol": symbol,
2883
+ "new_name": new_name,
2884
+ "targets_found": len(all_targets),
2885
+ "files_to_modify": files_to_modify,
2886
+ "files_count": len(files_to_modify),
2887
+ "projects_affected": list(projects_affected),
2888
+ "note": (
2889
+ f"Found {len(files_to_modify)} files to update. "
2890
+ "This is a plan only — no files have been changed."
2891
+ ),
2892
+ }, project, overlay_store=overlay_store)
2893
+ except Exception as exc:
2894
+ return _safe_tool_response("rename_plan", exc)
2895
+
2671
2896
  # ------------------------------------------------------------------
2672
2897
  # Advanced / raw access
2673
2898
  # ------------------------------------------------------------------
@@ -11,13 +11,30 @@ from codespine.config import SETTINGS
11
11
 
12
12
 
13
13
  def _hash_vector(text: str, dim: int) -> list[float]:
14
- """Deterministic fallback embedding when sentence-transformers is unavailable."""
14
+ """Deterministic fallback embedding when sentence-transformers is unavailable.
15
+
16
+ Uses character n-grams (bi-, tri-, and quad-grams) plus full word tokens for
17
+ significantly better score calibration compared to word-only hashing. Similar
18
+ identifiers (e.g. ``getUserById`` vs ``getUserByName``) now land closer in
19
+ vector space than they would with whole-word tokens alone.
20
+ """
15
21
  vec = [0.0] * dim
16
22
  if not text:
17
23
  return vec
18
- tokens = text.lower().split()
19
- for token in tokens:
20
- digest = hashlib.sha1(token.encode("utf-8")).digest()
24
+ normalized = text.lower()
25
+
26
+ features: list[str] = []
27
+ # Include whole words (split on camelCase boundaries too)
28
+ import re as _re
29
+ words = _re.sub(r"([a-z])([A-Z])", r"\1 \2", text).lower().split()
30
+ features.extend(words)
31
+ # Character n-grams (bigrams, trigrams, quadgrams) over normalized text
32
+ for n in (2, 3, 4):
33
+ for i in range(len(normalized) - n + 1):
34
+ features.append(normalized[i : i + n])
35
+
36
+ for feat in features:
37
+ digest = hashlib.sha1(feat.encode("utf-8")).digest()
21
38
  idx = int.from_bytes(digest[:2], "big") % dim
22
39
  sign = 1.0 if digest[2] % 2 == 0 else -1.0
23
40
  vec[idx] += sign
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.9.8
3
+ Version: 0.9.9
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -20,6 +20,8 @@ codespine/analysis/crossmodule.py
20
20
  codespine/analysis/deadcode.py
21
21
  codespine/analysis/flow.py
22
22
  codespine/analysis/impact.py
23
+ codespine/cache/__init__.py
24
+ codespine/cache/result_cache.py
23
25
  codespine/db/__init__.py
24
26
  codespine/db/duckdb_store.py
25
27
  codespine/db/schema.py
@@ -61,6 +63,7 @@ tests/test_index_and_hybrid.py
61
63
  tests/test_java_parser.py
62
64
  tests/test_multimodule_index.py
63
65
  tests/test_overlay.py
66
+ tests/test_result_cache.py
64
67
  tests/test_search_ranking.py
65
68
  tests/test_sharding.py
66
69
  tests/test_store_recovery.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.9.8"
7
+ version = "0.9.9"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1,179 @@
1
+ """Tests for codespine.cache.result_cache.ResultCache."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+
7
+ import pytest
8
+
9
+ from codespine.cache.result_cache import ResultCache
10
+
11
+
12
+ # ---------------------------------------------------------------------------
13
+ # make_key
14
+ # ---------------------------------------------------------------------------
15
+
16
+
17
+ def test_make_key_same_args_same_key():
18
+ k1 = ResultCache.make_key("get_impact", {"symbol": "Foo", "project": None}, 1000.0)
19
+ k2 = ResultCache.make_key("get_impact", {"symbol": "Foo", "project": None}, 1000.0)
20
+ assert k1 == k2
21
+
22
+
23
+ def test_make_key_different_tool():
24
+ k1 = ResultCache.make_key("get_impact", {"symbol": "Foo"}, 1000.0)
25
+ k2 = ResultCache.make_key("detect_dead_code", {"symbol": "Foo"}, 1000.0)
26
+ assert k1 != k2
27
+
28
+
29
+ def test_make_key_different_mtime():
30
+ k1 = ResultCache.make_key("get_impact", {"symbol": "Foo"}, 1000.0)
31
+ k2 = ResultCache.make_key("get_impact", {"symbol": "Foo"}, 1001.0)
32
+ assert k1 != k2
33
+
34
+
35
+ def test_make_key_different_args():
36
+ k1 = ResultCache.make_key("search", {"query": "Foo"}, 1000.0)
37
+ k2 = ResultCache.make_key("search", {"query": "Bar"}, 1000.0)
38
+ assert k1 != k2
39
+
40
+
41
+ def test_make_key_arg_order_independent():
42
+ k1 = ResultCache.make_key("t", {"a": 1, "b": 2}, 0.0)
43
+ k2 = ResultCache.make_key("t", {"b": 2, "a": 1}, 0.0)
44
+ assert k1 == k2 # sorted keys in json.dumps
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # get / put / hit
49
+ # ---------------------------------------------------------------------------
50
+
51
+
52
+ def test_cache_miss_returns_none():
53
+ c = ResultCache()
54
+ assert c.get(("x", "y", 0.0)) is None
55
+
56
+
57
+ def test_cache_put_and_get():
58
+ c = ResultCache()
59
+ k = ResultCache.make_key("t", {}, 0.0)
60
+ c.put(k, '{"result": 1}')
61
+ assert c.get(k) == '{"result": 1}'
62
+
63
+
64
+ def test_cache_hit_stat():
65
+ c = ResultCache()
66
+ k = ResultCache.make_key("t", {}, 0.0)
67
+ c.put(k, "v")
68
+ c.get(k)
69
+ s = c.stats()
70
+ assert s["hits"] == 1
71
+ assert s["misses"] == 0
72
+
73
+
74
+ def test_cache_miss_stat():
75
+ c = ResultCache()
76
+ c.get(("x", "y", 0.0))
77
+ s = c.stats()
78
+ assert s["misses"] == 1
79
+ assert s["hits"] == 0
80
+
81
+
82
+ # ---------------------------------------------------------------------------
83
+ # LRU eviction
84
+ # ---------------------------------------------------------------------------
85
+
86
+
87
+ def test_lru_eviction():
88
+ c = ResultCache(maxsize=3)
89
+ for i in range(4):
90
+ k = ResultCache.make_key("t", {"i": i}, 0.0)
91
+ c.put(k, str(i))
92
+ # First entry should have been evicted.
93
+ k0 = ResultCache.make_key("t", {"i": 0}, 0.0)
94
+ assert c.get(k0) is None
95
+ # Later entries should still be present.
96
+ k3 = ResultCache.make_key("t", {"i": 3}, 0.0)
97
+ assert c.get(k3) == "3"
98
+
99
+
100
+ def test_lru_promotes_on_get():
101
+ c = ResultCache(maxsize=3)
102
+ keys = [ResultCache.make_key("t", {"i": i}, 0.0) for i in range(3)]
103
+ for i, k in enumerate(keys):
104
+ c.put(k, str(i))
105
+ # Access key 0 to promote it to MRU.
106
+ c.get(keys[0])
107
+ # Add a new entry — key 1 (now oldest) should be evicted.
108
+ k3 = ResultCache.make_key("t", {"i": 3}, 0.0)
109
+ c.put(k3, "3")
110
+ assert c.get(keys[0]) is not None # was promoted, should survive
111
+ assert c.get(keys[1]) is None # was LRU, should be evicted
112
+
113
+
114
+ # ---------------------------------------------------------------------------
115
+ # TTL expiry
116
+ # ---------------------------------------------------------------------------
117
+
118
+
119
+ def test_ttl_expiry(monkeypatch):
120
+ c = ResultCache(ttl_s=0.05)
121
+ k = ResultCache.make_key("t", {}, 0.0)
122
+ c.put(k, "v")
123
+ # Patch monotonic to advance time past TTL.
124
+ real_monotonic = time.monotonic
125
+
126
+ class _FakeClock:
127
+ def __init__(self):
128
+ self._base = real_monotonic()
129
+
130
+ def __call__(self):
131
+ return self._base + 1.0 # 1 second ahead
132
+
133
+ monkeypatch.setattr(time, "monotonic", _FakeClock())
134
+ assert c.get(k) is None
135
+
136
+
137
+ # ---------------------------------------------------------------------------
138
+ # Invalidation
139
+ # ---------------------------------------------------------------------------
140
+
141
+
142
+ def test_invalidate_clears_all():
143
+ c = ResultCache()
144
+ for i in range(5):
145
+ k = ResultCache.make_key("t", {"i": i}, 0.0)
146
+ c.put(k, str(i))
147
+ removed = c.invalidate()
148
+ assert removed == 5
149
+ assert c.stats()["size"] == 0
150
+
151
+
152
+ def test_invalidate_tool():
153
+ c = ResultCache()
154
+ k1 = ResultCache.make_key("get_impact", {"s": "A"}, 0.0)
155
+ k2 = ResultCache.make_key("detect_dead_code", {}, 0.0)
156
+ c.put(k1, "a")
157
+ c.put(k2, "b")
158
+ removed = c.invalidate_tool("get_impact")
159
+ assert removed == 1
160
+ assert c.get(k1) is None
161
+ assert c.get(k2) == "b"
162
+
163
+
164
+ # ---------------------------------------------------------------------------
165
+ # Stats
166
+ # ---------------------------------------------------------------------------
167
+
168
+
169
+ def test_stats_hit_rate():
170
+ c = ResultCache()
171
+ k = ResultCache.make_key("t", {}, 0.0)
172
+ c.put(k, "v")
173
+ c.get(k) # hit
174
+ c.get(k) # hit
175
+ c.get(("x",)) # miss
176
+ s = c.stats()
177
+ assert s["hits"] == 2
178
+ assert s["misses"] == 1
179
+ assert abs(s["hit_rate"] - 2 / 3) < 0.01
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes