codespine 0.5.6__tar.gz → 0.5.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {codespine-0.5.6 → codespine-0.5.8}/PKG-INFO +29 -3
  2. {codespine-0.5.6 → codespine-0.5.8}/README.md +28 -2
  3. {codespine-0.5.6 → codespine-0.5.8}/codespine/__init__.py +1 -1
  4. {codespine-0.5.6 → codespine-0.5.8}/codespine/cli.py +38 -4
  5. {codespine-0.5.6 → codespine-0.5.8}/codespine/config.py +1 -0
  6. {codespine-0.5.6 → codespine-0.5.8}/codespine/db/store.py +48 -4
  7. {codespine-0.5.6 → codespine-0.5.8}/codespine/mcp/server.py +46 -0
  8. {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/PKG-INFO +29 -3
  9. {codespine-0.5.6 → codespine-0.5.8}/pyproject.toml +1 -1
  10. {codespine-0.5.6 → codespine-0.5.8}/LICENSE +0 -0
  11. {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/__init__.py +0 -0
  12. {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/community.py +0 -0
  13. {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/context.py +0 -0
  14. {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/coupling.py +0 -0
  15. {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/crossmodule.py +0 -0
  16. {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/deadcode.py +0 -0
  17. {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/flow.py +0 -0
  18. {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/impact.py +0 -0
  19. {codespine-0.5.6 → codespine-0.5.8}/codespine/db/__init__.py +0 -0
  20. {codespine-0.5.6 → codespine-0.5.8}/codespine/db/schema.py +0 -0
  21. {codespine-0.5.6 → codespine-0.5.8}/codespine/diff/__init__.py +0 -0
  22. {codespine-0.5.6 → codespine-0.5.8}/codespine/diff/branch_diff.py +0 -0
  23. {codespine-0.5.6 → codespine-0.5.8}/codespine/indexer/__init__.py +0 -0
  24. {codespine-0.5.6 → codespine-0.5.8}/codespine/indexer/call_resolver.py +0 -0
  25. {codespine-0.5.6 → codespine-0.5.8}/codespine/indexer/engine.py +0 -0
  26. {codespine-0.5.6 → codespine-0.5.8}/codespine/indexer/java_parser.py +0 -0
  27. {codespine-0.5.6 → codespine-0.5.8}/codespine/indexer/symbol_builder.py +0 -0
  28. {codespine-0.5.6 → codespine-0.5.8}/codespine/mcp/__init__.py +0 -0
  29. {codespine-0.5.6 → codespine-0.5.8}/codespine/noise/__init__.py +0 -0
  30. {codespine-0.5.6 → codespine-0.5.8}/codespine/noise/blocklist.py +0 -0
  31. {codespine-0.5.6 → codespine-0.5.8}/codespine/overlay/__init__.py +0 -0
  32. {codespine-0.5.6 → codespine-0.5.8}/codespine/overlay/git_state.py +0 -0
  33. {codespine-0.5.6 → codespine-0.5.8}/codespine/overlay/merge.py +0 -0
  34. {codespine-0.5.6 → codespine-0.5.8}/codespine/overlay/store.py +0 -0
  35. {codespine-0.5.6 → codespine-0.5.8}/codespine/search/__init__.py +0 -0
  36. {codespine-0.5.6 → codespine-0.5.8}/codespine/search/bm25.py +0 -0
  37. {codespine-0.5.6 → codespine-0.5.8}/codespine/search/fuzzy.py +0 -0
  38. {codespine-0.5.6 → codespine-0.5.8}/codespine/search/hybrid.py +0 -0
  39. {codespine-0.5.6 → codespine-0.5.8}/codespine/search/rrf.py +0 -0
  40. {codespine-0.5.6 → codespine-0.5.8}/codespine/search/vector.py +0 -0
  41. {codespine-0.5.6 → codespine-0.5.8}/codespine/watch/__init__.py +0 -0
  42. {codespine-0.5.6 → codespine-0.5.8}/codespine/watch/watcher.py +0 -0
  43. {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/SOURCES.txt +0 -0
  44. {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/dependency_links.txt +0 -0
  45. {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/entry_points.txt +0 -0
  46. {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/requires.txt +0 -0
  47. {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/top_level.txt +0 -0
  48. {codespine-0.5.6 → codespine-0.5.8}/gindex.py +0 -0
  49. {codespine-0.5.6 → codespine-0.5.8}/setup.cfg +0 -0
  50. {codespine-0.5.6 → codespine-0.5.8}/tests/test_branch_diff_normalize.py +0 -0
  51. {codespine-0.5.6 → codespine-0.5.8}/tests/test_call_resolver.py +0 -0
  52. {codespine-0.5.6 → codespine-0.5.8}/tests/test_community_detection.py +0 -0
  53. {codespine-0.5.6 → codespine-0.5.8}/tests/test_deadcode.py +0 -0
  54. {codespine-0.5.6 → codespine-0.5.8}/tests/test_index_and_hybrid.py +0 -0
  55. {codespine-0.5.6 → codespine-0.5.8}/tests/test_java_parser.py +0 -0
  56. {codespine-0.5.6 → codespine-0.5.8}/tests/test_multimodule_index.py +0 -0
  57. {codespine-0.5.6 → codespine-0.5.8}/tests/test_overlay.py +0 -0
  58. {codespine-0.5.6 → codespine-0.5.8}/tests/test_search_ranking.py +0 -0
  59. {codespine-0.5.6 → codespine-0.5.8}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.5.6
3
+ Version: 0.5.8
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -72,6 +72,8 @@ It indexes classes, methods, calls, type relationships, cross-module links, git
72
72
 
73
73
  It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
74
74
 
75
+ The MCP daemon and the indexer run independently. Querying while a full re-index is running no longer causes crashes or memory contention — reads go to an isolated snapshot that is atomically updated when indexing completes.
76
+
75
77
  ## Why It Saves Tokens
76
78
 
77
79
  - One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
@@ -101,6 +103,8 @@ pip install "codespine[ml]"
101
103
  - Community detection: structural clusters for architectural context
102
104
  - Change coupling: git-history-based file relationships
103
105
  - Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
106
+ - Cross-module call linking: signature-based detection of calls between Maven/Gradle modules
107
+ - Concurrent read/write isolation: MCP queries run against a read replica; the indexer writes separately, with no memory contention
104
108
  - MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
105
109
 
106
110
  ## Editing Without Stale Indexes
@@ -159,15 +163,23 @@ Parsing code... 8/8
159
163
  Tracing calls... 847 calls resolved
160
164
  Analyzing types... 234 type relationships
161
165
  Cross-module linking... skipped (single module)
166
+ Detecting communities... loading symbols
167
+ Detecting communities... 623 symbols, 1204 structural edges
168
+ Detecting communities... persisting 8/8 clusters
162
169
  Detecting communities... 8 clusters found
170
+ Detecting execution flows... 34 entry points, tracing
163
171
  Detecting execution flows... 34 processes found
164
172
  Finding dead code... 12 unreachable symbols
173
+ Analyzing git history... 18 commits, computing co-changes
165
174
  Analyzing git history... 18 coupled file pairs
166
175
  Generating embeddings... 0 vectors stored
167
176
 
168
177
  Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows (no embeddings; rerun with --embed for semantic search)
178
+ Publishing read replica... MCP will reload automatically
169
179
  ```
170
180
 
181
+ Each analysis phase streams live progress in place. The final step publishes a read replica so the MCP daemon picks up the new index without restarting.
182
+
171
183
  Search the index:
172
184
 
173
185
  ```bash
@@ -284,9 +296,22 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
284
296
 
285
297
  `--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
286
298
 
299
+ ## Concurrent Indexing and Querying
300
+
301
+ The indexer (write) and the MCP daemon (read) use separate database paths:
302
+
303
+ - The indexer writes to `~/.codespine_db` with a 512 MB buffer pool.
304
+ - When indexing completes, `analyse` atomically copies the database to `~/.codespine_db_read` and touches a sentinel file.
305
+ - The MCP daemon and all read-only CLI commands open `~/.codespine_db_read` with a 128 MB buffer pool.
306
+ - The MCP daemon watches the sentinel file and silently reloads from the new snapshot on the next tool call — no restart needed.
307
+
308
+ Running `codespine analyse --deep --embed` on one project while querying a different one no longer causes buffer pool OOM or lock contention.
309
+
287
310
  ## Runtime Files
288
311
 
289
- - `~/.codespine_db` - graph database
312
+ - `~/.codespine_db` - graph database (write)
313
+ - `~/.codespine_db_read` - read replica used by MCP and CLI queries
314
+ - `~/.codespine_db_read.updated` - sentinel file; touched after each successful snapshot
290
315
  - `~/.codespine.pid` - MCP background server PID
291
316
  - `~/.codespine.log` - server log
292
317
  - `~/.codespine_embedding_cache.json` - embedding cache
@@ -297,8 +322,9 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
297
322
 
298
323
  - `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
299
324
  - `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
300
- - `codespine clear-index` rebuilds the local index database from scratch.
325
+ - `codespine clear-index` rebuilds the local index database from scratch. This also removes the read replica; run `analyse` again to republish it.
301
326
  - For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
327
+ - The first run after upgrading to v0.5.7 will not have a read replica yet. Run `codespine analyse` once to create it.
302
328
 
303
329
  ## Project Docs
304
330
 
@@ -8,6 +8,8 @@ It indexes classes, methods, calls, type relationships, cross-module links, git
8
8
 
9
9
  It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
10
10
 
11
+ The MCP daemon and the indexer run independently. Querying while a full re-index is running no longer causes crashes or memory contention — reads go to an isolated snapshot that is atomically updated when indexing completes.
12
+
11
13
  ## Why It Saves Tokens
12
14
 
13
15
  - One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
@@ -37,6 +39,8 @@ pip install "codespine[ml]"
37
39
  - Community detection: structural clusters for architectural context
38
40
  - Change coupling: git-history-based file relationships
39
41
  - Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
42
+ - Cross-module call linking: signature-based detection of calls between Maven/Gradle modules
43
+ - Concurrent read/write isolation: MCP queries run against a read replica; the indexer writes separately, with no memory contention
40
44
  - MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
41
45
 
42
46
  ## Editing Without Stale Indexes
@@ -95,15 +99,23 @@ Parsing code... 8/8
95
99
  Tracing calls... 847 calls resolved
96
100
  Analyzing types... 234 type relationships
97
101
  Cross-module linking... skipped (single module)
102
+ Detecting communities... loading symbols
103
+ Detecting communities... 623 symbols, 1204 structural edges
104
+ Detecting communities... persisting 8/8 clusters
98
105
  Detecting communities... 8 clusters found
106
+ Detecting execution flows... 34 entry points, tracing
99
107
  Detecting execution flows... 34 processes found
100
108
  Finding dead code... 12 unreachable symbols
109
+ Analyzing git history... 18 commits, computing co-changes
101
110
  Analyzing git history... 18 coupled file pairs
102
111
  Generating embeddings... 0 vectors stored
103
112
 
104
113
  Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows (no embeddings; rerun with --embed for semantic search)
114
+ Publishing read replica... MCP will reload automatically
105
115
  ```
106
116
 
117
+ Each analysis phase streams live progress in place. The final step publishes a read replica so the MCP daemon picks up the new index without restarting.
118
+
107
119
  Search the index:
108
120
 
109
121
  ```bash
@@ -220,9 +232,22 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
220
232
 
221
233
  `--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
222
234
 
235
+ ## Concurrent Indexing and Querying
236
+
237
+ The indexer (write) and the MCP daemon (read) use separate database paths:
238
+
239
+ - The indexer writes to `~/.codespine_db` with a 512 MB buffer pool.
240
+ - When indexing completes, `analyse` atomically copies the database to `~/.codespine_db_read` and touches a sentinel file.
241
+ - The MCP daemon and all read-only CLI commands open `~/.codespine_db_read` with a 128 MB buffer pool.
242
+ - The MCP daemon watches the sentinel file and silently reloads from the new snapshot on the next tool call — no restart needed.
243
+
244
+ Running `codespine analyse --deep --embed` on one project while querying a different one no longer causes buffer pool OOM or lock contention.
245
+
223
246
  ## Runtime Files
224
247
 
225
- - `~/.codespine_db` - graph database
248
+ - `~/.codespine_db` - graph database (write)
249
+ - `~/.codespine_db_read` - read replica used by MCP and CLI queries
250
+ - `~/.codespine_db_read.updated` - sentinel file; touched after each successful snapshot
226
251
  - `~/.codespine.pid` - MCP background server PID
227
252
  - `~/.codespine.log` - server log
228
253
  - `~/.codespine_embedding_cache.json` - embedding cache
@@ -233,8 +258,9 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
233
258
 
234
259
  - `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
235
260
  - `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
236
- - `codespine clear-index` rebuilds the local index database from scratch.
261
+ - `codespine clear-index` rebuilds the local index database from scratch. This also removes the read replica; run `analyse` again to republish it.
237
262
  - For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
263
+ - The first run after upgrading to v0.5.7 will not have a read replica yet. Run `codespine analyse` once to create it.
238
264
 
239
265
  ## Project Docs
240
266
 
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.5.6"
4
+ __version__ = "0.5.8"
@@ -319,6 +319,16 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
319
319
  fg="green",
320
320
  )
321
321
 
322
+ # Publish a read replica so MCP and read-only CLI commands (search, stats…)
323
+ # run against an isolated snapshot rather than competing with the write
324
+ # process's buffer pool. The MCP daemon detects the sentinel file and
325
+ # hot-reloads without restarting.
326
+ snap_label = "Publishing read replica..."
327
+ _live_phase(snap_label, "copying")
328
+ store._recycle_conn()
329
+ snapped = GraphStore.snapshot_to_read_replica()
330
+ _finish_phase(snap_label, "MCP will reload automatically" if snapped else "skipped (source DB not found)")
331
+
322
332
 
323
333
  @main.command()
324
334
  @click.argument("query")
@@ -462,19 +472,43 @@ def stats(as_json: bool) -> None:
462
472
  "MATCH (f:File) WHERE f.project_id = $pid RETURN count(f) as n", {"pid": pid}
463
473
  )
464
474
  classes = store.query_records(
465
- "MATCH (c:Class), (f:File) WHERE c.file_id = f.id AND f.project_id = $pid RETURN count(c) as n",
475
+ """
476
+ MATCH (f:File) WHERE f.project_id = $pid
477
+ WITH f
478
+ MATCH (c:Class) WHERE c.file_id = f.id
479
+ RETURN count(c) as n
480
+ """,
466
481
  {"pid": pid},
467
482
  )
468
483
  methods = store.query_records(
469
- "MATCH (m:Method), (c:Class), (f:File) WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid RETURN count(m) as n",
484
+ """
485
+ MATCH (f:File) WHERE f.project_id = $pid
486
+ WITH f
487
+ MATCH (c:Class) WHERE c.file_id = f.id
488
+ WITH c
489
+ MATCH (c)-[:HAS_METHOD]->(m:Method)
490
+ RETURN count(m) as n
491
+ """,
470
492
  {"pid": pid},
471
493
  )
472
494
  calls = store.query_records(
473
- "MATCH (ma:Method)-[:CALLS]->(mb:Method), (ca:Class), (fa:File) WHERE ma.class_id = ca.id AND ca.file_id = fa.id AND fa.project_id = $pid RETURN count(*) as n",
495
+ """
496
+ MATCH (f:File) WHERE f.project_id = $pid
497
+ WITH f
498
+ MATCH (c:Class) WHERE c.file_id = f.id
499
+ WITH c
500
+ MATCH (c)-[:HAS_METHOD]->(m:Method)-[:CALLS]->()
501
+ RETURN count(*) as n
502
+ """,
474
503
  {"pid": pid},
475
504
  )
476
505
  emb = store.query_records(
477
- "MATCH (s:Symbol), (f:File) WHERE s.file_id = f.id AND f.project_id = $pid AND s.embedding IS NOT NULL RETURN count(s) as n",
506
+ """
507
+ MATCH (f:File) WHERE f.project_id = $pid
508
+ WITH f
509
+ MATCH (s:Symbol) WHERE s.file_id = f.id AND s.embedding IS NOT NULL
510
+ RETURN count(s) as n
511
+ """,
478
512
  {"pid": pid},
479
513
  )
480
514
  rows.append({
@@ -5,6 +5,7 @@ from dataclasses import dataclass
5
5
  @dataclass(frozen=True)
6
6
  class Settings:
7
7
  db_path: str = os.path.expanduser("~/.codespine_db")
8
+ db_snapshot_path: str = os.path.expanduser("~/.codespine_db_read")
8
9
  pid_file: str = os.path.expanduser("~/.codespine.pid")
9
10
  log_file: str = os.path.expanduser("~/.codespine.log")
10
11
  embedding_cache_path: str = os.path.expanduser("~/.codespine_embedding_cache.json")
@@ -18,7 +18,8 @@ from codespine.db.schema import ensure_schema
18
18
 
19
19
  LOGGER = logging.getLogger(__name__)
20
20
 
21
- _BUFFER_POOL_SIZE = 512 * 1024 * 1024 # 512 MB – room for large community detection
21
+ _WRITE_BUFFER_POOL_SIZE = 512 * 1024 * 1024 # 512 MB – room for large community detection
22
+ _READ_BUFFER_POOL_SIZE = 128 * 1024 * 1024 # 128 MB – point queries only; keep footprint small
22
23
  _RECOVERABLE_DB_ERROR_MARKERS = (
23
24
  "storage version mismatch",
24
25
  "catalog version mismatch",
@@ -35,11 +36,18 @@ class GraphStore:
35
36
  read_only: bool = False
36
37
 
37
38
  def __post_init__(self) -> None:
38
- db_path = SETTINGS.db_path
39
39
  self._tls: threading.local = threading.local()
40
40
  from codespine.overlay.store import OverlayStore
41
41
 
42
42
  self.overlay_store = OverlayStore()
43
+
44
+ # Read-only callers (MCP, CLI reads) use the read replica when available.
45
+ # This isolates them from the write process's buffer pool and WAL churn.
46
+ if self.read_only and os.path.exists(SETTINGS.db_snapshot_path):
47
+ db_path = SETTINGS.db_snapshot_path
48
+ else:
49
+ db_path = SETTINGS.db_path
50
+
43
51
  try:
44
52
  self.db = self._open_with_recovery(db_path)
45
53
  except Exception as exc:
@@ -50,11 +58,12 @@ class GraphStore:
50
58
  self._ensure_schema_with_recovery()
51
59
 
52
60
  def _open_db(self, path: str) -> kuzu.Database:
61
+ pool = _READ_BUFFER_POOL_SIZE if self.read_only else _WRITE_BUFFER_POOL_SIZE
53
62
  # Newer Kuzu versions accept read_only; fall back for older ones.
54
63
  try:
55
- return kuzu.Database(path, buffer_pool_size=_BUFFER_POOL_SIZE, read_only=self.read_only)
64
+ return kuzu.Database(path, buffer_pool_size=pool, read_only=self.read_only)
56
65
  except TypeError:
57
- return kuzu.Database(path, buffer_pool_size=_BUFFER_POOL_SIZE)
66
+ return kuzu.Database(path, buffer_pool_size=pool)
58
67
 
59
68
  @staticmethod
60
69
  def _is_recoverable_db_error(exc: Exception) -> bool:
@@ -553,6 +562,41 @@ class GraphStore:
553
562
  },
554
563
  )
555
564
 
565
+ @staticmethod
566
+ def snapshot_to_read_replica() -> bool:
567
+ """Atomically copy the write DB to the read-replica path.
568
+
569
+ The read replica is used by the MCP daemon and all read-only CLI
570
+ commands so they never contend with the write process's buffer pool.
571
+ Returns True on success, False if the source DB does not exist.
572
+ """
573
+ src = SETTINGS.db_path
574
+ dst = SETTINGS.db_snapshot_path
575
+ if not os.path.exists(src):
576
+ return False
577
+ tmp = dst + ".tmp"
578
+ try:
579
+ if os.path.exists(tmp):
580
+ shutil.rmtree(tmp, ignore_errors=True)
581
+ if os.path.isdir(src):
582
+ shutil.copytree(src, tmp)
583
+ else:
584
+ os.makedirs(os.path.dirname(dst) or ".", exist_ok=True)
585
+ shutil.copy2(src, tmp)
586
+ if os.path.exists(dst):
587
+ shutil.rmtree(dst, ignore_errors=True)
588
+ os.rename(tmp, dst)
589
+ # Sentinel: MCP daemon watches this file's mtime to know when to reload.
590
+ sentinel = dst + ".updated"
591
+ with open(sentinel, "w", encoding="utf-8") as f:
592
+ f.write(str(int(time.time())))
593
+ return True
594
+ except Exception as exc:
595
+ LOGGER.warning("Snapshot to read replica failed: %s", exc)
596
+ if os.path.exists(tmp):
597
+ shutil.rmtree(tmp, ignore_errors=True)
598
+ return False
599
+
556
600
  def query_records(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
557
601
  frame = self.execute(query, params or {}).get_as_df()
558
602
  if frame.empty:
@@ -1,6 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json as _json_mod
4
+ import logging
5
+ import os
4
6
  import subprocess
5
7
  import sys
6
8
  import tempfile
@@ -8,6 +10,10 @@ import time
8
10
 
9
11
  from fastmcp import FastMCP
10
12
 
13
+ from codespine.config import SETTINGS
14
+
15
+ _LOGGER = logging.getLogger(__name__)
16
+
11
17
  from codespine import __version__
12
18
  from codespine.analysis.community import detect_communities, symbol_community
13
19
  from codespine.analysis.context import build_symbol_context
@@ -129,7 +135,47 @@ def _staleness_meta(
129
135
  return _json(response)
130
136
 
131
137
 
138
+ class _StoreProxy:
139
+ """Wraps a GraphStore and hot-reloads from the read replica when the
140
+ post-analyse sentinel file is touched.
141
+
142
+ After `codespine analyse` finishes it copies the write DB to
143
+ ``~/.codespine_db_read`` and writes ``~/.codespine_db_read.updated``.
144
+ This proxy checks that sentinel's mtime before every attribute access and
145
+ silently swaps in a fresh read-only GraphStore so the MCP daemon picks up
146
+ the new index without restarting.
147
+ """
148
+
149
+ def __init__(self, store) -> None:
150
+ object.__setattr__(self, "_store", store)
151
+ object.__setattr__(self, "_sentinel", SETTINGS.db_snapshot_path + ".updated")
152
+ object.__setattr__(self, "_last_mtime", self._sentinel_mtime())
153
+
154
+ def _sentinel_mtime(self) -> float:
155
+ try:
156
+ return os.path.getmtime(object.__getattribute__(self, "_sentinel"))
157
+ except FileNotFoundError:
158
+ return 0.0
159
+
160
+ def _maybe_reload(self) -> None:
161
+ current = self._sentinel_mtime()
162
+ if current > object.__getattribute__(self, "_last_mtime"):
163
+ from codespine.db.store import GraphStore as _GS
164
+ try:
165
+ new_store = _GS(read_only=True)
166
+ object.__setattr__(self, "_store", new_store)
167
+ object.__setattr__(self, "_last_mtime", current)
168
+ _LOGGER.info("MCP: hot-reloaded GraphStore from updated snapshot")
169
+ except Exception as exc:
170
+ _LOGGER.warning("MCP: hot-reload failed: %s", exc)
171
+
172
+ def __getattr__(self, name: str):
173
+ self._maybe_reload()
174
+ return getattr(object.__getattribute__(self, "_store"), name)
175
+
176
+
132
177
  def build_mcp_server(store, repo_path_provider):
178
+ store = _StoreProxy(store)
133
179
  _raw_mcp = FastMCP("codespine")
134
180
  overlay_store = getattr(store, "overlay_store", None)
135
181
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.5.6
3
+ Version: 0.5.8
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -72,6 +72,8 @@ It indexes classes, methods, calls, type relationships, cross-module links, git
72
72
 
73
73
  It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
74
74
 
75
+ The MCP daemon and the indexer run independently. Querying while a full re-index is running no longer causes crashes or memory contention — reads go to an isolated snapshot that is atomically updated when indexing completes.
76
+
75
77
  ## Why It Saves Tokens
76
78
 
77
79
  - One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
@@ -101,6 +103,8 @@ pip install "codespine[ml]"
101
103
  - Community detection: structural clusters for architectural context
102
104
  - Change coupling: git-history-based file relationships
103
105
  - Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
106
+ - Cross-module call linking: signature-based detection of calls between Maven/Gradle modules
107
+ - Concurrent read/write isolation: MCP queries run against a read replica; the indexer writes separately, with no memory contention
104
108
  - MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
105
109
 
106
110
  ## Editing Without Stale Indexes
@@ -159,15 +163,23 @@ Parsing code... 8/8
159
163
  Tracing calls... 847 calls resolved
160
164
  Analyzing types... 234 type relationships
161
165
  Cross-module linking... skipped (single module)
166
+ Detecting communities... loading symbols
167
+ Detecting communities... 623 symbols, 1204 structural edges
168
+ Detecting communities... persisting 8/8 clusters
162
169
  Detecting communities... 8 clusters found
170
+ Detecting execution flows... 34 entry points, tracing
163
171
  Detecting execution flows... 34 processes found
164
172
  Finding dead code... 12 unreachable symbols
173
+ Analyzing git history... 18 commits, computing co-changes
165
174
  Analyzing git history... 18 coupled file pairs
166
175
  Generating embeddings... 0 vectors stored
167
176
 
168
177
  Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows (no embeddings; rerun with --embed for semantic search)
178
+ Publishing read replica... MCP will reload automatically
169
179
  ```
170
180
 
181
+ Each analysis phase streams live progress in place. The final step publishes a read replica so the MCP daemon picks up the new index without restarting.
182
+
171
183
  Search the index:
172
184
 
173
185
  ```bash
@@ -284,9 +296,22 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
284
296
 
285
297
  `--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
286
298
 
299
+ ## Concurrent Indexing and Querying
300
+
301
+ The indexer (write) and the MCP daemon (read) use separate database paths:
302
+
303
+ - The indexer writes to `~/.codespine_db` with a 512 MB buffer pool.
304
+ - When indexing completes, `analyse` atomically copies the database to `~/.codespine_db_read` and touches a sentinel file.
305
+ - The MCP daemon and all read-only CLI commands open `~/.codespine_db_read` with a 128 MB buffer pool.
306
+ - The MCP daemon watches the sentinel file and silently reloads from the new snapshot on the next tool call — no restart needed.
307
+
308
+ Running `codespine analyse --deep --embed` on one project while querying a different one no longer causes buffer pool OOM or lock contention.
309
+
287
310
  ## Runtime Files
288
311
 
289
- - `~/.codespine_db` - graph database
312
+ - `~/.codespine_db` - graph database (write)
313
+ - `~/.codespine_db_read` - read replica used by MCP and CLI queries
314
+ - `~/.codespine_db_read.updated` - sentinel file; touched after each successful snapshot
290
315
  - `~/.codespine.pid` - MCP background server PID
291
316
  - `~/.codespine.log` - server log
292
317
  - `~/.codespine_embedding_cache.json` - embedding cache
@@ -297,8 +322,9 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
297
322
 
298
323
  - `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
299
324
  - `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
300
- - `codespine clear-index` rebuilds the local index database from scratch.
325
+ - `codespine clear-index` rebuilds the local index database from scratch. This also removes the read replica; run `analyse` again to republish it.
301
326
  - For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
327
+ - The first run after upgrading to v0.5.7 will not have a read replica yet. Run `codespine analyse` once to create it.
302
328
 
303
329
  ## Project Docs
304
330
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.5.6"
7
+ version = "0.5.8"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
File without changes
File without changes
File without changes