codespine 0.9.7__tar.gz → 0.9.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {codespine-0.9.7 → codespine-0.9.9}/PKG-INFO +4 -1
  2. {codespine-0.9.7 → codespine-0.9.9}/codespine/__init__.py +1 -1
  3. codespine-0.9.9/codespine/cache/__init__.py +4 -0
  4. codespine-0.9.9/codespine/cache/result_cache.py +167 -0
  5. {codespine-0.9.7 → codespine-0.9.9}/codespine/cli.py +39 -3
  6. {codespine-0.9.7 → codespine-0.9.9}/codespine/config.py +5 -1
  7. codespine-0.9.9/codespine/db/duckdb_store.py +882 -0
  8. {codespine-0.9.7 → codespine-0.9.9}/codespine/db/store.py +1 -0
  9. {codespine-0.9.7 → codespine-0.9.9}/codespine/mcp/server.py +230 -5
  10. {codespine-0.9.7 → codespine-0.9.9}/codespine/search/vector.py +21 -4
  11. {codespine-0.9.7 → codespine-0.9.9}/codespine/sharding/store.py +37 -17
  12. {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/PKG-INFO +4 -1
  13. {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/SOURCES.txt +5 -0
  14. {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/requires.txt +4 -0
  15. {codespine-0.9.7 → codespine-0.9.9}/pyproject.toml +6 -2
  16. codespine-0.9.9/tests/test_duckdb_store.py +401 -0
  17. codespine-0.9.9/tests/test_result_cache.py +179 -0
  18. {codespine-0.9.7 → codespine-0.9.9}/LICENSE +0 -0
  19. {codespine-0.9.7 → codespine-0.9.9}/README.md +0 -0
  20. {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/__init__.py +0 -0
  21. {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/community.py +0 -0
  22. {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/context.py +0 -0
  23. {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/coupling.py +0 -0
  24. {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/crossmodule.py +0 -0
  25. {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/deadcode.py +0 -0
  26. {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/flow.py +0 -0
  27. {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/impact.py +0 -0
  28. {codespine-0.9.7 → codespine-0.9.9}/codespine/db/__init__.py +0 -0
  29. {codespine-0.9.7 → codespine-0.9.9}/codespine/db/schema.py +0 -0
  30. {codespine-0.9.7 → codespine-0.9.9}/codespine/diff/__init__.py +0 -0
  31. {codespine-0.9.7 → codespine-0.9.9}/codespine/diff/branch_diff.py +0 -0
  32. {codespine-0.9.7 → codespine-0.9.9}/codespine/guide.py +0 -0
  33. {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/__init__.py +0 -0
  34. {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/call_resolver.py +0 -0
  35. {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/di_resolver.py +0 -0
  36. {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/engine.py +0 -0
  37. {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/java_parser.py +0 -0
  38. {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/symbol_builder.py +0 -0
  39. {codespine-0.9.7 → codespine-0.9.9}/codespine/mcp/__init__.py +0 -0
  40. {codespine-0.9.7 → codespine-0.9.9}/codespine/noise/__init__.py +0 -0
  41. {codespine-0.9.7 → codespine-0.9.9}/codespine/noise/blocklist.py +0 -0
  42. {codespine-0.9.7 → codespine-0.9.9}/codespine/overlay/__init__.py +0 -0
  43. {codespine-0.9.7 → codespine-0.9.9}/codespine/overlay/git_state.py +0 -0
  44. {codespine-0.9.7 → codespine-0.9.9}/codespine/overlay/merge.py +0 -0
  45. {codespine-0.9.7 → codespine-0.9.9}/codespine/overlay/store.py +0 -0
  46. {codespine-0.9.7 → codespine-0.9.9}/codespine/search/__init__.py +0 -0
  47. {codespine-0.9.7 → codespine-0.9.9}/codespine/search/bm25.py +0 -0
  48. {codespine-0.9.7 → codespine-0.9.9}/codespine/search/fuzzy.py +0 -0
  49. {codespine-0.9.7 → codespine-0.9.9}/codespine/search/hybrid.py +0 -0
  50. {codespine-0.9.7 → codespine-0.9.9}/codespine/search/rrf.py +0 -0
  51. {codespine-0.9.7 → codespine-0.9.9}/codespine/sharding/__init__.py +0 -0
  52. {codespine-0.9.7 → codespine-0.9.9}/codespine/sharding/router.py +0 -0
  53. {codespine-0.9.7 → codespine-0.9.9}/codespine/watch/__init__.py +0 -0
  54. {codespine-0.9.7 → codespine-0.9.9}/codespine/watch/git_hook.py +0 -0
  55. {codespine-0.9.7 → codespine-0.9.9}/codespine/watch/watcher.py +0 -0
  56. {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/dependency_links.txt +0 -0
  57. {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/entry_points.txt +0 -0
  58. {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/top_level.txt +0 -0
  59. {codespine-0.9.7 → codespine-0.9.9}/gindex.py +0 -0
  60. {codespine-0.9.7 → codespine-0.9.9}/setup.cfg +0 -0
  61. {codespine-0.9.7 → codespine-0.9.9}/tests/test_branch_diff_normalize.py +0 -0
  62. {codespine-0.9.7 → codespine-0.9.9}/tests/test_call_resolver.py +0 -0
  63. {codespine-0.9.7 → codespine-0.9.9}/tests/test_community_detection.py +0 -0
  64. {codespine-0.9.7 → codespine-0.9.9}/tests/test_deadcode.py +0 -0
  65. {codespine-0.9.7 → codespine-0.9.9}/tests/test_index_and_hybrid.py +0 -0
  66. {codespine-0.9.7 → codespine-0.9.9}/tests/test_java_parser.py +0 -0
  67. {codespine-0.9.7 → codespine-0.9.9}/tests/test_multimodule_index.py +0 -0
  68. {codespine-0.9.7 → codespine-0.9.9}/tests/test_overlay.py +0 -0
  69. {codespine-0.9.7 → codespine-0.9.9}/tests/test_search_ranking.py +0 -0
  70. {codespine-0.9.7 → codespine-0.9.9}/tests/test_sharding.py +0 -0
  71. {codespine-0.9.7 → codespine-0.9.9}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.9.7
3
+ Version: 0.9.9
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -55,11 +55,14 @@ Requires-Dist: numpy; extra == "ml"
55
55
  Provides-Extra: community
56
56
  Requires-Dist: igraph; extra == "community"
57
57
  Requires-Dist: leidenalg; extra == "community"
58
+ Provides-Extra: duckdb
59
+ Requires-Dist: duckdb>=0.10.0; extra == "duckdb"
58
60
  Provides-Extra: full
59
61
  Requires-Dist: sentence-transformers; extra == "full"
60
62
  Requires-Dist: numpy; extra == "full"
61
63
  Requires-Dist: igraph; extra == "full"
62
64
  Requires-Dist: leidenalg; extra == "full"
65
+ Requires-Dist: duckdb>=0.10.0; extra == "full"
63
66
  Dynamic: license-file
64
67
 
65
68
  # CodeSpine
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.9.7"
4
+ __version__ = "0.9.9"
@@ -0,0 +1,4 @@
1
+ """Result caching for MCP tools."""
2
+ from codespine.cache.result_cache import ResultCache
3
+
4
+ __all__ = ["ResultCache"]
@@ -0,0 +1,167 @@
1
+ """LRU result cache for CodeSpine MCP tools.
2
+
3
+ Avoids recomputing expensive analyses (impact BFS, dead-code scan, community
4
+ lookup) when the same arguments are passed and the underlying index hasn't
5
+ changed since the last call.
6
+
7
+ Cache key: ``(tool_name, args_hash, snapshot_mtime_rounded)``
8
+ - ``tool_name`` — the MCP tool that produced the result
9
+ - ``args_hash`` — SHA-1 of the JSON-serialised arguments (sorted keys)
10
+ - ``snapshot_mtime_rounded`` — read-replica mtime rounded to 1 s, so a new
11
+ snapshot invalidates all cached results for the affected store
12
+
13
+ TTL: entries are evicted after ``ttl_s`` seconds (default 300 s / 5 min) even
14
+ if the cache isn't full, preventing stale results across long sessions.
15
+
16
+ Usage
17
+ -----
18
+ from codespine.cache.result_cache import ResultCache
19
+
20
+ _cache = ResultCache(maxsize=256, ttl_s=300.0)
21
+
22
+ key = _cache.make_key("get_impact", {"symbol": "Foo", "project": "myapp"}, mtime)
23
+ cached = _cache.get(key)
24
+ if cached is not None:
25
+ return cached
26
+ result = expensive_computation(...)
27
+ _cache.put(key, result)
28
+ return result
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import hashlib
34
+ import json
35
+ import threading
36
+ import time
37
+ from collections import OrderedDict
38
+ from typing import Any
39
+
40
+
41
+ class ResultCache:
42
+ """Thread-safe LRU cache for pre-serialised JSON tool results.
43
+
44
+ Parameters
45
+ ----------
46
+ maxsize:
47
+ Maximum number of entries to keep. Oldest entry is evicted when
48
+ the cache is full (LRU eviction).
49
+ ttl_s:
50
+ Time-to-live in seconds. Entries older than this are treated as
51
+ missing even if they're still in the cache.
52
+ """
53
+
54
+ def __init__(self, maxsize: int = 256, ttl_s: float = 300.0) -> None:
55
+ self._maxsize = maxsize
56
+ self._ttl = ttl_s
57
+ # OrderedDict preserves insertion order: oldest → newest
58
+ self._cache: OrderedDict[tuple, tuple[str, float]] = OrderedDict()
59
+ self._lock = threading.Lock()
60
+ self._hits = 0
61
+ self._misses = 0
62
+
63
+ # ------------------------------------------------------------------
64
+ # Key construction
65
+ # ------------------------------------------------------------------
66
+
67
+ @staticmethod
68
+ def make_key(
69
+ tool_name: str,
70
+ args: dict[str, Any],
71
+ snapshot_mtime: float,
72
+ ) -> tuple:
73
+ """Build a cache key from tool name, arguments, and index timestamp.
74
+
75
+ Parameters
76
+ ----------
77
+ tool_name:
78
+ Name of the MCP tool (e.g. ``"get_impact"``).
79
+ args:
80
+ Tool arguments dict (``None`` values included so missing optional
81
+ args don't collide with explicitly-set ones).
82
+ snapshot_mtime:
83
+ Last-modified time of the read-replica sentinel file, rounded to
84
+ 1-second precision. A new snapshot invalidates old entries.
85
+ """
86
+ try:
87
+ args_bytes = json.dumps(args, sort_keys=True, default=str).encode()
88
+ except Exception:
89
+ args_bytes = str(args).encode()
90
+ args_hash = hashlib.sha1(args_bytes).hexdigest()[:16]
91
+ return (tool_name, args_hash, round(snapshot_mtime, 0))
92
+
93
+ # ------------------------------------------------------------------
94
+ # Cache operations
95
+ # ------------------------------------------------------------------
96
+
97
+ def get(self, key: tuple) -> str | None:
98
+ """Return the cached value for *key*, or ``None`` if missing/expired."""
99
+ with self._lock:
100
+ if key not in self._cache:
101
+ self._misses += 1
102
+ return None
103
+ value, inserted_at = self._cache[key]
104
+ if time.monotonic() - inserted_at > self._ttl:
105
+ del self._cache[key]
106
+ self._misses += 1
107
+ return None
108
+ # Promote to most-recently-used position.
109
+ self._cache.move_to_end(key)
110
+ self._hits += 1
111
+ return value
112
+
113
+ def put(self, key: tuple, value: str) -> None:
114
+ """Store *value* under *key*. Evicts LRU entry if cache is full."""
115
+ with self._lock:
116
+ if key in self._cache:
117
+ self._cache.move_to_end(key)
118
+ self._cache[key] = (value, time.monotonic())
119
+ # Evict oldest entries until we're within maxsize.
120
+ while len(self._cache) > self._maxsize:
121
+ self._cache.popitem(last=False)
122
+
123
+ def invalidate(self) -> int:
124
+ """Clear the entire cache. Call after any index mutation.
125
+
126
+ Returns the number of entries evicted.
127
+ """
128
+ with self._lock:
129
+ n = len(self._cache)
130
+ self._cache.clear()
131
+ return n
132
+
133
+ def invalidate_tool(self, tool_name: str) -> int:
134
+ """Evict all entries for a specific tool.
135
+
136
+ Returns the number of entries removed.
137
+ """
138
+ with self._lock:
139
+ keys_to_remove = [k for k in self._cache if k[0] == tool_name]
140
+ for k in keys_to_remove:
141
+ del self._cache[k]
142
+ return len(keys_to_remove)
143
+
144
+ # ------------------------------------------------------------------
145
+ # Stats / introspection
146
+ # ------------------------------------------------------------------
147
+
148
+ def stats(self) -> dict[str, Any]:
149
+ """Return cache statistics (size, hit/miss counts, hit rate)."""
150
+ with self._lock:
151
+ total = self._hits + self._misses
152
+ return {
153
+ "size": len(self._cache),
154
+ "maxsize": self._maxsize,
155
+ "ttl_s": self._ttl,
156
+ "hits": self._hits,
157
+ "misses": self._misses,
158
+ "hit_rate": round(self._hits / total, 3) if total else 0.0,
159
+ }
160
+
161
+ def __repr__(self) -> str: # pragma: no cover
162
+ s = self.stats()
163
+ return (
164
+ f"ResultCache(size={s['size']}/{s['maxsize']}, "
165
+ f"hits={s['hits']}, misses={s['misses']}, "
166
+ f"hit_rate={s['hit_rate']:.1%})"
167
+ )
@@ -293,7 +293,13 @@ def main() -> None:
293
293
  @main.command()
294
294
  @click.argument("path", type=click.Path(exists=True))
295
295
  @click.option("--full/--incremental", default=False, show_default=True)
296
- @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
296
+ @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses (auto-on for repos ≤3 k files).")
297
+ @click.option(
298
+ "--incremental-deep",
299
+ is_flag=True,
300
+ default=False,
301
+ help="Force deep analysis even during incremental re-index. Useful after large refactors.",
302
+ )
297
303
  @click.option(
298
304
  "--embed/--no-embed",
299
305
  default=True,
@@ -301,7 +307,7 @@ def main() -> None:
301
307
  help="Generate vector embeddings. Uses sentence-transformers if installed (pip install codespine[ml]), otherwise falls back to hash-based vectors.",
302
308
  )
303
309
  @click.option("--allow-running", is_flag=True, hidden=True, help="Skip MCP running check (used by MCP analyse_project tool).")
304
- def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool) -> None:
310
+ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bool, allow_running: bool) -> None:
305
311
  """Index a local Java project (auto-detects workspace / Maven / Gradle layout).
306
312
 
307
313
  Embeddings are generated by default. If sentence-transformers is installed
@@ -459,7 +465,7 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
459
465
  dead: list[dict] = []
460
466
  coupling_pairs: list[dict] = []
461
467
 
462
- should_run_deep = deep or total_files_found <= 1200
468
+ should_run_deep = deep or incremental_deep or total_files_found <= 3000
463
469
  if should_run_deep:
464
470
  comm_label = "Detecting communities..."
465
471
  _live_phase(comm_label, "running")
@@ -1138,6 +1144,36 @@ def stop() -> None:
1138
1144
  os.remove(SETTINGS.pid_file)
1139
1145
 
1140
1146
 
1147
+ @main.command("install-model")
1148
+ def install_model() -> None:
1149
+ """Download and cache the sentence-transformers embedding model.
1150
+
1151
+ Requires 'pip install codespine[ml]'. The model is downloaded once and
1152
+ cached locally; subsequent analyse runs use the cache without network access.
1153
+ """
1154
+ try:
1155
+ from sentence_transformers import SentenceTransformer # noqa: F401
1156
+ except ImportError:
1157
+ click.secho(
1158
+ "sentence-transformers is not installed.\n"
1159
+ "Run: pip install codespine[ml]",
1160
+ fg="red",
1161
+ )
1162
+ return
1163
+
1164
+ model_name = SETTINGS.embedding_model
1165
+ click.secho(f"Downloading model '{model_name}' …", fg="cyan")
1166
+ try:
1167
+ from sentence_transformers import SentenceTransformer
1168
+
1169
+ model = SentenceTransformer(model_name)
1170
+ # Run a tiny inference to confirm the model is usable.
1171
+ _ = model.encode(["hello world"])
1172
+ click.secho(f"✓ Model '{model_name}' ready. Semantic search is now enabled.", fg="green")
1173
+ except Exception as exc:
1174
+ click.secho(f"✗ Failed to load model: {exc}", fg="red")
1175
+
1176
+
1141
1177
  @main.command("run-mcp", hidden=True)
1142
1178
  def run_mcp() -> None:
1143
1179
  """Run MCP server in stdio mode."""
@@ -1,5 +1,5 @@
1
1
  import os
2
- from dataclasses import dataclass
2
+ from dataclasses import dataclass, field
3
3
 
4
4
 
5
5
  @dataclass(frozen=True)
@@ -15,6 +15,10 @@ class Settings:
15
15
  num_shards: int = 4
16
16
  shards_dir: str = os.path.expanduser("~/.codespine/shards")
17
17
 
18
+ # Storage backend: "kuzu" (default, property-graph) or "duckdb" (relational).
19
+ # Override at runtime via CODESPINE_BACKEND env var before starting the process.
20
+ backend: str = field(default_factory=lambda: os.environ.get("CODESPINE_BACKEND", "kuzu"))
21
+
18
22
  pid_file: str = os.path.expanduser("~/.codespine.pid")
19
23
  log_file: str = os.path.expanduser("~/.codespine.log")
20
24
  embedding_cache_path: str = os.path.expanduser("~/.codespine_embedding_cache.json")