codespine 0.5.5__tar.gz → 0.5.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {codespine-0.5.5 → codespine-0.5.7}/PKG-INFO +38 -1
  2. {codespine-0.5.5 → codespine-0.5.7}/README.md +37 -0
  3. {codespine-0.5.5 → codespine-0.5.7}/codespine/__init__.py +1 -1
  4. {codespine-0.5.5 → codespine-0.5.7}/codespine/cli.py +38 -4
  5. {codespine-0.5.5 → codespine-0.5.7}/codespine/config.py +1 -0
  6. {codespine-0.5.5 → codespine-0.5.7}/codespine/db/store.py +98 -15
  7. {codespine-0.5.5 → codespine-0.5.7}/codespine/mcp/server.py +46 -0
  8. {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/PKG-INFO +38 -1
  9. {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/SOURCES.txt +2 -1
  10. {codespine-0.5.5 → codespine-0.5.7}/pyproject.toml +1 -1
  11. codespine-0.5.7/tests/test_store_recovery.py +52 -0
  12. {codespine-0.5.5 → codespine-0.5.7}/LICENSE +0 -0
  13. {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/__init__.py +0 -0
  14. {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/community.py +0 -0
  15. {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/context.py +0 -0
  16. {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/coupling.py +0 -0
  17. {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/crossmodule.py +0 -0
  18. {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/deadcode.py +0 -0
  19. {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/flow.py +0 -0
  20. {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/impact.py +0 -0
  21. {codespine-0.5.5 → codespine-0.5.7}/codespine/db/__init__.py +0 -0
  22. {codespine-0.5.5 → codespine-0.5.7}/codespine/db/schema.py +0 -0
  23. {codespine-0.5.5 → codespine-0.5.7}/codespine/diff/__init__.py +0 -0
  24. {codespine-0.5.5 → codespine-0.5.7}/codespine/diff/branch_diff.py +0 -0
  25. {codespine-0.5.5 → codespine-0.5.7}/codespine/indexer/__init__.py +0 -0
  26. {codespine-0.5.5 → codespine-0.5.7}/codespine/indexer/call_resolver.py +0 -0
  27. {codespine-0.5.5 → codespine-0.5.7}/codespine/indexer/engine.py +0 -0
  28. {codespine-0.5.5 → codespine-0.5.7}/codespine/indexer/java_parser.py +0 -0
  29. {codespine-0.5.5 → codespine-0.5.7}/codespine/indexer/symbol_builder.py +0 -0
  30. {codespine-0.5.5 → codespine-0.5.7}/codespine/mcp/__init__.py +0 -0
  31. {codespine-0.5.5 → codespine-0.5.7}/codespine/noise/__init__.py +0 -0
  32. {codespine-0.5.5 → codespine-0.5.7}/codespine/noise/blocklist.py +0 -0
  33. {codespine-0.5.5 → codespine-0.5.7}/codespine/overlay/__init__.py +0 -0
  34. {codespine-0.5.5 → codespine-0.5.7}/codespine/overlay/git_state.py +0 -0
  35. {codespine-0.5.5 → codespine-0.5.7}/codespine/overlay/merge.py +0 -0
  36. {codespine-0.5.5 → codespine-0.5.7}/codespine/overlay/store.py +0 -0
  37. {codespine-0.5.5 → codespine-0.5.7}/codespine/search/__init__.py +0 -0
  38. {codespine-0.5.5 → codespine-0.5.7}/codespine/search/bm25.py +0 -0
  39. {codespine-0.5.5 → codespine-0.5.7}/codespine/search/fuzzy.py +0 -0
  40. {codespine-0.5.5 → codespine-0.5.7}/codespine/search/hybrid.py +0 -0
  41. {codespine-0.5.5 → codespine-0.5.7}/codespine/search/rrf.py +0 -0
  42. {codespine-0.5.5 → codespine-0.5.7}/codespine/search/vector.py +0 -0
  43. {codespine-0.5.5 → codespine-0.5.7}/codespine/watch/__init__.py +0 -0
  44. {codespine-0.5.5 → codespine-0.5.7}/codespine/watch/watcher.py +0 -0
  45. {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/dependency_links.txt +0 -0
  46. {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/entry_points.txt +0 -0
  47. {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/requires.txt +0 -0
  48. {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/top_level.txt +0 -0
  49. {codespine-0.5.5 → codespine-0.5.7}/gindex.py +0 -0
  50. {codespine-0.5.5 → codespine-0.5.7}/setup.cfg +0 -0
  51. {codespine-0.5.5 → codespine-0.5.7}/tests/test_branch_diff_normalize.py +0 -0
  52. {codespine-0.5.5 → codespine-0.5.7}/tests/test_call_resolver.py +0 -0
  53. {codespine-0.5.5 → codespine-0.5.7}/tests/test_community_detection.py +0 -0
  54. {codespine-0.5.5 → codespine-0.5.7}/tests/test_deadcode.py +0 -0
  55. {codespine-0.5.5 → codespine-0.5.7}/tests/test_index_and_hybrid.py +0 -0
  56. {codespine-0.5.5 → codespine-0.5.7}/tests/test_java_parser.py +0 -0
  57. {codespine-0.5.5 → codespine-0.5.7}/tests/test_multimodule_index.py +0 -0
  58. {codespine-0.5.5 → codespine-0.5.7}/tests/test_overlay.py +0 -0
  59. {codespine-0.5.5 → codespine-0.5.7}/tests/test_search_ranking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.5.5
3
+ Version: 0.5.7
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -70,12 +70,15 @@ Instead of having an agent open dozens of `.java` files to answer one question,
70
70
 
71
71
  It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
72
72
 
73
+ It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
74
+
73
75
  ## Why It Saves Tokens
74
76
 
75
77
  - One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
76
78
  - Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
77
79
  - Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
78
80
  - Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
81
+ - Active edits stay smooth. Dirty files are kept in an overlay and merged into fast queries until you commit, instead of hammering the main graph DB on each change.
79
82
 
80
83
  ## Install
81
84
 
@@ -100,6 +103,32 @@ pip install "codespine[ml]"
100
103
  - Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
101
104
  - MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
102
105
 
106
+ ## Editing Without Stale Indexes
107
+
108
+ CodeSpine uses a two-layer model:
109
+
110
+ - Base index: last committed state
111
+ - Dirty overlay: uncommitted Java changes
112
+
113
+ Fast tools read merged `base + overlay` state by default:
114
+
115
+ - `search`
116
+ - `context`
117
+ - `impact`
118
+ - MCP `search_hybrid`
119
+ - MCP `find_symbol`
120
+ - MCP `get_symbol_context`
121
+ - MCP `get_impact`
122
+
123
+ Deep analyses stay committed-only until promotion:
124
+
125
+ - `deadcode`
126
+ - `flow`
127
+ - `community`
128
+ - `coupling`
129
+
130
+ `codespine watch` updates the dirty overlay after a debounce window, then promotes it into the base index when local `HEAD` changes.
131
+
103
132
  ## Quick Start
104
133
 
105
134
  Index a repo:
@@ -205,6 +234,7 @@ codespine analyse <path> --full
205
234
  codespine analyse <path> --deep
206
235
  codespine analyse <path> --embed
207
236
  codespine watch --path .
237
+ codespine watch --path . --overlay-debounce-ms 1500
208
238
  codespine search "query"
209
239
  codespine context "symbol"
210
240
  codespine impact "symbol"
@@ -215,6 +245,9 @@ codespine coupling
215
245
  codespine diff main..feature
216
246
  codespine stats
217
247
  codespine list
248
+ codespine overlay-status
249
+ codespine overlay-promote
250
+ codespine overlay-clear
218
251
  codespine clear-project <project_id>
219
252
  codespine clear-index
220
253
  ```
@@ -247,6 +280,8 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
247
280
 
248
281
  Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
249
282
 
283
+ When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
284
+
250
285
  `--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
251
286
 
252
287
  ## Runtime Files
@@ -256,10 +291,12 @@ Use it when you want architecture-level context. Skip it when you just need the
256
291
  - `~/.codespine.log` - server log
257
292
  - `~/.codespine_embedding_cache.json` - embedding cache
258
293
  - `~/.codespine_index_meta/` - incremental file metadata cache
294
+ - `~/.codespine_overlay/` - uncommitted dirty overlay state
259
295
 
260
296
  ## Notes
261
297
 
262
298
  - `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
299
+ - `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
263
300
  - `codespine clear-index` rebuilds the local index database from scratch.
264
301
  - For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
265
302
 
@@ -6,12 +6,15 @@ Instead of having an agent open dozens of `.java` files to answer one question,
6
6
 
7
7
  It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
8
8
 
9
+ It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
10
+
9
11
  ## Why It Saves Tokens
10
12
 
11
13
  - One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
12
14
  - Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
13
15
  - Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
14
16
  - Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
17
+ - Active edits stay smooth. Dirty files are kept in an overlay and merged into fast queries until you commit, instead of hammering the main graph DB on each change.
15
18
 
16
19
  ## Install
17
20
 
@@ -36,6 +39,32 @@ pip install "codespine[ml]"
36
39
  - Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
37
40
  - MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
38
41
 
42
+ ## Editing Without Stale Indexes
43
+
44
+ CodeSpine uses a two-layer model:
45
+
46
+ - Base index: last committed state
47
+ - Dirty overlay: uncommitted Java changes
48
+
49
+ Fast tools read merged `base + overlay` state by default:
50
+
51
+ - `search`
52
+ - `context`
53
+ - `impact`
54
+ - MCP `search_hybrid`
55
+ - MCP `find_symbol`
56
+ - MCP `get_symbol_context`
57
+ - MCP `get_impact`
58
+
59
+ Deep analyses stay committed-only until promotion:
60
+
61
+ - `deadcode`
62
+ - `flow`
63
+ - `community`
64
+ - `coupling`
65
+
66
+ `codespine watch` updates the dirty overlay after a debounce window, then promotes it into the base index when local `HEAD` changes.
67
+
39
68
  ## Quick Start
40
69
 
41
70
  Index a repo:
@@ -141,6 +170,7 @@ codespine analyse <path> --full
141
170
  codespine analyse <path> --deep
142
171
  codespine analyse <path> --embed
143
172
  codespine watch --path .
173
+ codespine watch --path . --overlay-debounce-ms 1500
144
174
  codespine search "query"
145
175
  codespine context "symbol"
146
176
  codespine impact "symbol"
@@ -151,6 +181,9 @@ codespine coupling
151
181
  codespine diff main..feature
152
182
  codespine stats
153
183
  codespine list
184
+ codespine overlay-status
185
+ codespine overlay-promote
186
+ codespine overlay-clear
154
187
  codespine clear-project <project_id>
155
188
  codespine clear-index
156
189
  ```
@@ -183,6 +216,8 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
183
216
 
184
217
  Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
185
218
 
219
+ When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
220
+
186
221
  `--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
187
222
 
188
223
  ## Runtime Files
@@ -192,10 +227,12 @@ Use it when you want architecture-level context. Skip it when you just need the
192
227
  - `~/.codespine.log` - server log
193
228
  - `~/.codespine_embedding_cache.json` - embedding cache
194
229
  - `~/.codespine_index_meta/` - incremental file metadata cache
230
+ - `~/.codespine_overlay/` - uncommitted dirty overlay state
195
231
 
196
232
  ## Notes
197
233
 
198
234
  - `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
235
+ - `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
199
236
  - `codespine clear-index` rebuilds the local index database from scratch.
200
237
  - For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
201
238
 
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.5.5"
4
+ __version__ = "0.5.7"
@@ -319,6 +319,16 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
319
319
  fg="green",
320
320
  )
321
321
 
322
+ # Publish a read replica so MCP and read-only CLI commands (search, stats…)
323
+ # run against an isolated snapshot rather than competing with the write
324
+ # process's buffer pool. The MCP daemon detects the sentinel file and
325
+ # hot-reloads without restarting.
326
+ snap_label = "Publishing read replica..."
327
+ _live_phase(snap_label, "copying")
328
+ store._recycle_conn()
329
+ snapped = GraphStore.snapshot_to_read_replica()
330
+ _finish_phase(snap_label, "MCP will reload automatically" if snapped else "skipped (source DB not found)")
331
+
322
332
 
323
333
  @main.command()
324
334
  @click.argument("query")
@@ -462,19 +472,43 @@ def stats(as_json: bool) -> None:
462
472
  "MATCH (f:File) WHERE f.project_id = $pid RETURN count(f) as n", {"pid": pid}
463
473
  )
464
474
  classes = store.query_records(
465
- "MATCH (c:Class), (f:File) WHERE c.file_id = f.id AND f.project_id = $pid RETURN count(c) as n",
475
+ """
476
+ MATCH (f:File) WHERE f.project_id = $pid
477
+ WITH f
478
+ MATCH (c:Class) WHERE c.file_id = f.id
479
+ RETURN count(c) as n
480
+ """,
466
481
  {"pid": pid},
467
482
  )
468
483
  methods = store.query_records(
469
- "MATCH (m:Method), (c:Class), (f:File) WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid RETURN count(m) as n",
484
+ """
485
+ MATCH (f:File) WHERE f.project_id = $pid
486
+ WITH f
487
+ MATCH (c:Class) WHERE c.file_id = f.id
488
+ WITH c
489
+ MATCH (c)-[:HAS_METHOD]->(m:Method)
490
+ RETURN count(m) as n
491
+ """,
470
492
  {"pid": pid},
471
493
  )
472
494
  calls = store.query_records(
473
- "MATCH (ma:Method)-[:CALLS]->(mb:Method), (ca:Class), (fa:File) WHERE ma.class_id = ca.id AND ca.file_id = fa.id AND fa.project_id = $pid RETURN count(*) as n",
495
+ """
496
+ MATCH (f:File) WHERE f.project_id = $pid
497
+ WITH f
498
+ MATCH (c:Class) WHERE c.file_id = f.id
499
+ WITH c
500
+ MATCH (c)-[:HAS_METHOD]->(m:Method)-[:CALLS]->()
501
+ RETURN count(*) as n
502
+ """,
474
503
  {"pid": pid},
475
504
  )
476
505
  emb = store.query_records(
477
- "MATCH (s:Symbol), (f:File) WHERE s.file_id = f.id AND f.project_id = $pid AND s.embedding IS NOT NULL RETURN count(s) as n",
506
+ """
507
+ MATCH (f:File) WHERE f.project_id = $pid
508
+ WITH f
509
+ MATCH (s:Symbol) WHERE s.file_id = f.id AND s.embedding IS NOT NULL
510
+ RETURN count(s) as n
511
+ """,
478
512
  {"pid": pid},
479
513
  )
480
514
  rows.append({
@@ -5,6 +5,7 @@ from dataclasses import dataclass
5
5
  @dataclass(frozen=True)
6
6
  class Settings:
7
7
  db_path: str = os.path.expanduser("~/.codespine_db")
8
+ db_snapshot_path: str = os.path.expanduser("~/.codespine_db_read")
8
9
  pid_file: str = os.path.expanduser("~/.codespine.pid")
9
10
  log_file: str = os.path.expanduser("~/.codespine.log")
10
11
  embedding_cache_path: str = os.path.expanduser("~/.codespine_embedding_cache.json")
@@ -18,7 +18,17 @@ from codespine.db.schema import ensure_schema
18
18
 
19
19
  LOGGER = logging.getLogger(__name__)
20
20
 
21
- _BUFFER_POOL_SIZE = 512 * 1024 * 1024 # 512 MB – room for large community detection
21
+ _WRITE_BUFFER_POOL_SIZE = 512 * 1024 * 1024 # 512 MB – room for large community detection
22
+ _READ_BUFFER_POOL_SIZE = 128 * 1024 * 1024 # 128 MB – point queries only; keep footprint small
23
+ _RECOVERABLE_DB_ERROR_MARKERS = (
24
+ "storage version mismatch",
25
+ "catalog version mismatch",
26
+ "database version is not supported",
27
+ "wal version mismatch",
28
+ "corrupt",
29
+ "corrupted",
30
+ "invalid database",
31
+ )
22
32
 
23
33
 
24
34
  @dataclass
@@ -26,26 +36,70 @@ class GraphStore:
26
36
  read_only: bool = False
27
37
 
28
38
  def __post_init__(self) -> None:
29
- db_path = SETTINGS.db_path
30
39
  self._tls: threading.local = threading.local()
31
40
  from codespine.overlay.store import OverlayStore
32
41
 
33
42
  self.overlay_store = OverlayStore()
43
+
44
+ # Read-only callers (MCP, CLI reads) use the read replica when available.
45
+ # This isolates them from the write process's buffer pool and WAL churn.
46
+ if self.read_only and os.path.exists(SETTINGS.db_snapshot_path):
47
+ db_path = SETTINGS.db_snapshot_path
48
+ else:
49
+ db_path = SETTINGS.db_path
50
+
34
51
  try:
35
- self.db = self._open_db(db_path)
52
+ self.db = self._open_with_recovery(db_path)
36
53
  except Exception as exc:
37
54
  fallback = os.path.join("/tmp", ".codespine_db")
38
55
  LOGGER.warning("Primary DB path failed (%s). Falling back to %s", exc, fallback)
39
- self.db = self._open_db(fallback)
56
+ self.db = self._open_with_recovery(fallback)
40
57
  if not self.read_only:
41
- ensure_schema(self._conn())
58
+ self._ensure_schema_with_recovery()
42
59
 
43
60
  def _open_db(self, path: str) -> kuzu.Database:
61
+ pool = _READ_BUFFER_POOL_SIZE if self.read_only else _WRITE_BUFFER_POOL_SIZE
44
62
  # Newer Kuzu versions accept read_only; fall back for older ones.
45
63
  try:
46
- return kuzu.Database(path, buffer_pool_size=_BUFFER_POOL_SIZE, read_only=self.read_only)
64
+ return kuzu.Database(path, buffer_pool_size=pool, read_only=self.read_only)
47
65
  except TypeError:
48
- return kuzu.Database(path, buffer_pool_size=_BUFFER_POOL_SIZE)
66
+ return kuzu.Database(path, buffer_pool_size=pool)
67
+
68
+ @staticmethod
69
+ def _is_recoverable_db_error(exc: Exception) -> bool:
70
+ message = str(exc).lower()
71
+ return any(marker in message for marker in _RECOVERABLE_DB_ERROR_MARKERS)
72
+
73
+ @staticmethod
74
+ def _remove_db_path(path: str) -> None:
75
+ if os.path.isdir(path):
76
+ shutil.rmtree(path, ignore_errors=True)
77
+ elif os.path.exists(path):
78
+ os.remove(path)
79
+
80
+ def _open_with_recovery(self, path: str) -> kuzu.Database:
81
+ try:
82
+ return self._open_db(path)
83
+ except Exception as exc:
84
+ if not self._is_recoverable_db_error(exc):
85
+ raise
86
+ LOGGER.warning("Removing corrupted or incompatible Kuzu DB at %s: %s", path, exc)
87
+ self._remove_db_path(path)
88
+ self._tls = threading.local()
89
+ return self._open_db(path)
90
+
91
+ def _ensure_schema_with_recovery(self) -> None:
92
+ try:
93
+ ensure_schema(self._conn())
94
+ except Exception as exc:
95
+ path = getattr(self.db, "database_path", SETTINGS.db_path)
96
+ if not self._is_recoverable_db_error(exc):
97
+ raise
98
+ LOGGER.warning("Rebuilding corrupted or incompatible Kuzu DB at %s during schema init: %s", path, exc)
99
+ self._remove_db_path(path)
100
+ self.db = self._open_db(path)
101
+ self._tls = threading.local()
102
+ ensure_schema(self._conn())
49
103
 
50
104
  def _conn(self) -> kuzu.Connection:
51
105
  """Return the per-thread Kuzu connection, creating it lazily."""
@@ -447,16 +501,10 @@ class GraphStore:
447
501
  self._recycle_conn()
448
502
  path = SETTINGS.db_path
449
503
  try:
450
- if os.path.isdir(path):
451
- shutil.rmtree(path, ignore_errors=True)
452
- elif os.path.exists(path):
453
- os.remove(path)
504
+ self._remove_db_path(path)
454
505
  except OSError:
455
506
  fallback = os.path.join("/tmp", ".codespine_db")
456
- if os.path.isdir(fallback):
457
- shutil.rmtree(fallback, ignore_errors=True)
458
- elif os.path.exists(fallback):
459
- os.remove(fallback)
507
+ self._remove_db_path(fallback)
460
508
  self.db = self._open_db(fallback)
461
509
  else:
462
510
  self.db = self._open_db(path)
@@ -514,6 +562,41 @@ class GraphStore:
514
562
  },
515
563
  )
516
564
 
565
+ @staticmethod
566
+ def snapshot_to_read_replica() -> bool:
567
+ """Atomically copy the write DB to the read-replica path.
568
+
569
+ The read replica is used by the MCP daemon and all read-only CLI
570
+ commands so they never contend with the write process's buffer pool.
571
+ Returns True on success, False if the source DB does not exist.
572
+ """
573
+ src = SETTINGS.db_path
574
+ dst = SETTINGS.db_snapshot_path
575
+ if not os.path.exists(src):
576
+ return False
577
+ tmp = dst + ".tmp"
578
+ try:
579
+ if os.path.exists(tmp):
580
+ shutil.rmtree(tmp, ignore_errors=True)
581
+ if os.path.isdir(src):
582
+ shutil.copytree(src, tmp)
583
+ else:
584
+ os.makedirs(os.path.dirname(dst) or ".", exist_ok=True)
585
+ shutil.copy2(src, tmp)
586
+ if os.path.exists(dst):
587
+ shutil.rmtree(dst, ignore_errors=True)
588
+ os.rename(tmp, dst)
589
+ # Sentinel: MCP daemon watches this file's mtime to know when to reload.
590
+ sentinel = dst + ".updated"
591
+ with open(sentinel, "w", encoding="utf-8") as f:
592
+ f.write(str(int(time.time())))
593
+ return True
594
+ except Exception as exc:
595
+ LOGGER.warning("Snapshot to read replica failed: %s", exc)
596
+ if os.path.exists(tmp):
597
+ shutil.rmtree(tmp, ignore_errors=True)
598
+ return False
599
+
517
600
  def query_records(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
518
601
  frame = self.execute(query, params or {}).get_as_df()
519
602
  if frame.empty:
@@ -1,6 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json as _json_mod
4
+ import logging
5
+ import os
4
6
  import subprocess
5
7
  import sys
6
8
  import tempfile
@@ -8,6 +10,10 @@ import time
8
10
 
9
11
  from fastmcp import FastMCP
10
12
 
13
+ from codespine.config import SETTINGS
14
+
15
+ _LOGGER = logging.getLogger(__name__)
16
+
11
17
  from codespine import __version__
12
18
  from codespine.analysis.community import detect_communities, symbol_community
13
19
  from codespine.analysis.context import build_symbol_context
@@ -129,7 +135,47 @@ def _staleness_meta(
129
135
  return _json(response)
130
136
 
131
137
 
138
+ class _StoreProxy:
139
+ """Wraps a GraphStore and hot-reloads from the read replica when the
140
+ post-analyse sentinel file is touched.
141
+
142
+ After `codespine analyse` finishes it copies the write DB to
143
+ ``~/.codespine_db_read`` and writes ``~/.codespine_db_read.updated``.
144
+ This proxy checks that sentinel's mtime before every attribute access and
145
+ silently swaps in a fresh read-only GraphStore so the MCP daemon picks up
146
+ the new index without restarting.
147
+ """
148
+
149
+ def __init__(self, store) -> None:
150
+ object.__setattr__(self, "_store", store)
151
+ object.__setattr__(self, "_sentinel", SETTINGS.db_snapshot_path + ".updated")
152
+ object.__setattr__(self, "_last_mtime", self._sentinel_mtime())
153
+
154
+ def _sentinel_mtime(self) -> float:
155
+ try:
156
+ return os.path.getmtime(object.__getattribute__(self, "_sentinel"))
157
+ except FileNotFoundError:
158
+ return 0.0
159
+
160
+ def _maybe_reload(self) -> None:
161
+ current = self._sentinel_mtime()
162
+ if current > object.__getattribute__(self, "_last_mtime"):
163
+ from codespine.db.store import GraphStore as _GS
164
+ try:
165
+ new_store = _GS(read_only=True)
166
+ object.__setattr__(self, "_store", new_store)
167
+ object.__setattr__(self, "_last_mtime", current)
168
+ _LOGGER.info("MCP: hot-reloaded GraphStore from updated snapshot")
169
+ except Exception as exc:
170
+ _LOGGER.warning("MCP: hot-reload failed: %s", exc)
171
+
172
+ def __getattr__(self, name: str):
173
+ self._maybe_reload()
174
+ return getattr(object.__getattribute__(self, "_store"), name)
175
+
176
+
132
177
  def build_mcp_server(store, repo_path_provider):
178
+ store = _StoreProxy(store)
133
179
  _raw_mcp = FastMCP("codespine")
134
180
  overlay_store = getattr(store, "overlay_store", None)
135
181
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.5.5
3
+ Version: 0.5.7
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -70,12 +70,15 @@ Instead of having an agent open dozens of `.java` files to answer one question,
70
70
 
71
71
  It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
72
72
 
73
+ It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
74
+
73
75
  ## Why It Saves Tokens
74
76
 
75
77
  - One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
76
78
  - Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
77
79
  - Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
78
80
  - Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
81
+ - Active edits stay smooth. Dirty files are kept in an overlay and merged into fast queries until you commit, instead of hammering the main graph DB on each change.
79
82
 
80
83
  ## Install
81
84
 
@@ -100,6 +103,32 @@ pip install "codespine[ml]"
100
103
  - Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
101
104
  - MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
102
105
 
106
+ ## Editing Without Stale Indexes
107
+
108
+ CodeSpine uses a two-layer model:
109
+
110
+ - Base index: last committed state
111
+ - Dirty overlay: uncommitted Java changes
112
+
113
+ Fast tools read merged `base + overlay` state by default:
114
+
115
+ - `search`
116
+ - `context`
117
+ - `impact`
118
+ - MCP `search_hybrid`
119
+ - MCP `find_symbol`
120
+ - MCP `get_symbol_context`
121
+ - MCP `get_impact`
122
+
123
+ Deep analyses stay committed-only until promotion:
124
+
125
+ - `deadcode`
126
+ - `flow`
127
+ - `community`
128
+ - `coupling`
129
+
130
+ `codespine watch` updates the dirty overlay after a debounce window, then promotes it into the base index when local `HEAD` changes.
131
+
103
132
  ## Quick Start
104
133
 
105
134
  Index a repo:
@@ -205,6 +234,7 @@ codespine analyse <path> --full
205
234
  codespine analyse <path> --deep
206
235
  codespine analyse <path> --embed
207
236
  codespine watch --path .
237
+ codespine watch --path . --overlay-debounce-ms 1500
208
238
  codespine search "query"
209
239
  codespine context "symbol"
210
240
  codespine impact "symbol"
@@ -215,6 +245,9 @@ codespine coupling
215
245
  codespine diff main..feature
216
246
  codespine stats
217
247
  codespine list
248
+ codespine overlay-status
249
+ codespine overlay-promote
250
+ codespine overlay-clear
218
251
  codespine clear-project <project_id>
219
252
  codespine clear-index
220
253
  ```
@@ -247,6 +280,8 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
247
280
 
248
281
  Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
249
282
 
283
+ When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
284
+
250
285
  `--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
251
286
 
252
287
  ## Runtime Files
@@ -256,10 +291,12 @@ Use it when you want architecture-level context. Skip it when you just need the
256
291
  - `~/.codespine.log` - server log
257
292
  - `~/.codespine_embedding_cache.json` - embedding cache
258
293
  - `~/.codespine_index_meta/` - incremental file metadata cache
294
+ - `~/.codespine_overlay/` - uncommitted dirty overlay state
259
295
 
260
296
  ## Notes
261
297
 
262
298
  - `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
299
+ - `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
263
300
  - `codespine clear-index` rebuilds the local index database from scratch.
264
301
  - For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
265
302
 
@@ -53,4 +53,5 @@ tests/test_index_and_hybrid.py
53
53
  tests/test_java_parser.py
54
54
  tests/test_multimodule_index.py
55
55
  tests/test_overlay.py
56
- tests/test_search_ranking.py
56
+ tests/test_search_ranking.py
57
+ tests/test_store_recovery.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.5.5"
7
+ version = "0.5.7"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1,52 @@
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+
5
+ pytest.importorskip("kuzu")
6
+
7
+ from codespine.db.store import GraphStore
8
+
9
+
10
+ def test_open_with_recovery_rebuilds_legacy_db(monkeypatch):
11
+ opened: list[str] = []
12
+ removed: list[str] = []
13
+ calls = {"count": 0}
14
+
15
+ def fake_open(self, path: str):
16
+ calls["count"] += 1
17
+ opened.append(path)
18
+ if calls["count"] == 1:
19
+ raise RuntimeError("Storage version mismatch: unsupported database")
20
+ return object()
21
+
22
+ monkeypatch.setattr(GraphStore, "_open_db", fake_open)
23
+ monkeypatch.setattr(GraphStore, "_remove_db_path", staticmethod(lambda path: removed.append(path)))
24
+
25
+ store = GraphStore.__new__(GraphStore)
26
+ store.read_only = False
27
+ store._tls = None
28
+
29
+ db = GraphStore._open_with_recovery(store, "/tmp/test-codespine-db")
30
+
31
+ assert db is not None
32
+ assert opened == ["/tmp/test-codespine-db", "/tmp/test-codespine-db"]
33
+ assert removed == ["/tmp/test-codespine-db"]
34
+
35
+
36
+ def test_open_with_recovery_does_not_remove_on_permission_error(monkeypatch):
37
+ removed: list[str] = []
38
+
39
+ def fake_open(self, path: str):
40
+ raise RuntimeError("Operation not permitted")
41
+
42
+ monkeypatch.setattr(GraphStore, "_open_db", fake_open)
43
+ monkeypatch.setattr(GraphStore, "_remove_db_path", staticmethod(lambda path: removed.append(path)))
44
+
45
+ store = GraphStore.__new__(GraphStore)
46
+ store.read_only = False
47
+ store._tls = None
48
+
49
+ with pytest.raises(RuntimeError, match="Operation not permitted"):
50
+ GraphStore._open_with_recovery(store, "/tmp/test-codespine-db")
51
+
52
+ assert removed == []
File without changes
File without changes
File without changes