codespine 0.5.6__tar.gz → 0.5.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.5.6 → codespine-0.5.8}/PKG-INFO +29 -3
- {codespine-0.5.6 → codespine-0.5.8}/README.md +28 -2
- {codespine-0.5.6 → codespine-0.5.8}/codespine/__init__.py +1 -1
- {codespine-0.5.6 → codespine-0.5.8}/codespine/cli.py +38 -4
- {codespine-0.5.6 → codespine-0.5.8}/codespine/config.py +1 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/db/store.py +48 -4
- {codespine-0.5.6 → codespine-0.5.8}/codespine/mcp/server.py +46 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/PKG-INFO +29 -3
- {codespine-0.5.6 → codespine-0.5.8}/pyproject.toml +1 -1
- {codespine-0.5.6 → codespine-0.5.8}/LICENSE +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/__init__.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/community.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/context.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/coupling.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/crossmodule.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/flow.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/analysis/impact.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/db/__init__.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/db/schema.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/diff/__init__.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/indexer/__init__.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/indexer/engine.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/mcp/__init__.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/noise/__init__.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/noise/blocklist.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/overlay/__init__.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/overlay/git_state.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/overlay/merge.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/overlay/store.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/search/__init__.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/search/bm25.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/search/fuzzy.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/search/hybrid.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/search/rrf.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/search/vector.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/watch/__init__.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine/watch/watcher.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/gindex.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/setup.cfg +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/tests/test_call_resolver.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/tests/test_community_detection.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/tests/test_deadcode.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/tests/test_java_parser.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/tests/test_multimodule_index.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/tests/test_overlay.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/tests/test_search_ranking.py +0 -0
- {codespine-0.5.6 → codespine-0.5.8}/tests/test_store_recovery.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codespine
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.8
|
|
4
4
|
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
5
|
Author: CodeSpine contributors
|
|
6
6
|
License: MIT License
|
|
@@ -72,6 +72,8 @@ It indexes classes, methods, calls, type relationships, cross-module links, git
|
|
|
72
72
|
|
|
73
73
|
It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
|
|
74
74
|
|
|
75
|
+
The MCP daemon and the indexer run independently. Querying while a full re-index is running no longer causes crashes or memory contention — reads go to an isolated snapshot that is atomically updated when indexing completes.
|
|
76
|
+
|
|
75
77
|
## Why It Saves Tokens
|
|
76
78
|
|
|
77
79
|
- One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
|
|
@@ -101,6 +103,8 @@ pip install "codespine[ml]"
|
|
|
101
103
|
- Community detection: structural clusters for architectural context
|
|
102
104
|
- Change coupling: git-history-based file relationships
|
|
103
105
|
- Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
|
|
106
|
+
- Cross-module call linking: signature-based detection of calls between Maven/Gradle modules
|
|
107
|
+
- Concurrent read/write isolation: MCP queries run against a read replica; the indexer writes separately, with no memory contention
|
|
104
108
|
- MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
|
|
105
109
|
|
|
106
110
|
## Editing Without Stale Indexes
|
|
@@ -159,15 +163,23 @@ Parsing code... 8/8
|
|
|
159
163
|
Tracing calls... 847 calls resolved
|
|
160
164
|
Analyzing types... 234 type relationships
|
|
161
165
|
Cross-module linking... skipped (single module)
|
|
166
|
+
Detecting communities... loading symbols
|
|
167
|
+
Detecting communities... 623 symbols, 1204 structural edges
|
|
168
|
+
Detecting communities... persisting 8/8 clusters
|
|
162
169
|
Detecting communities... 8 clusters found
|
|
170
|
+
Detecting execution flows... 34 entry points, tracing
|
|
163
171
|
Detecting execution flows... 34 processes found
|
|
164
172
|
Finding dead code... 12 unreachable symbols
|
|
173
|
+
Analyzing git history... 18 commits, computing co-changes
|
|
165
174
|
Analyzing git history... 18 coupled file pairs
|
|
166
175
|
Generating embeddings... 0 vectors stored
|
|
167
176
|
|
|
168
177
|
Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows (no embeddings; rerun with --embed for semantic search)
|
|
178
|
+
Publishing read replica... MCP will reload automatically
|
|
169
179
|
```
|
|
170
180
|
|
|
181
|
+
Each analysis phase streams live progress in place. The final step publishes a read replica so the MCP daemon picks up the new index without restarting.
|
|
182
|
+
|
|
171
183
|
Search the index:
|
|
172
184
|
|
|
173
185
|
```bash
|
|
@@ -284,9 +296,22 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
|
|
|
284
296
|
|
|
285
297
|
`--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
|
|
286
298
|
|
|
299
|
+
## Concurrent Indexing and Querying
|
|
300
|
+
|
|
301
|
+
The indexer (write) and the MCP daemon (read) use separate database paths:
|
|
302
|
+
|
|
303
|
+
- The indexer writes to `~/.codespine_db` with a 512 MB buffer pool.
|
|
304
|
+
- When indexing completes, `analyse` atomically copies the database to `~/.codespine_db_read` and touches a sentinel file.
|
|
305
|
+
- The MCP daemon and all read-only CLI commands open `~/.codespine_db_read` with a 128 MB buffer pool.
|
|
306
|
+
- The MCP daemon watches the sentinel file and silently reloads from the new snapshot on the next tool call — no restart needed.
|
|
307
|
+
|
|
308
|
+
Running `codespine analyse --deep --embed` on one project while querying a different one no longer causes buffer pool OOM or lock contention.
|
|
309
|
+
|
|
287
310
|
## Runtime Files
|
|
288
311
|
|
|
289
|
-
- `~/.codespine_db` - graph database
|
|
312
|
+
- `~/.codespine_db` - graph database (write)
|
|
313
|
+
- `~/.codespine_db_read` - read replica used by MCP and CLI queries
|
|
314
|
+
- `~/.codespine_db_read.updated` - sentinel file; touched after each successful snapshot
|
|
290
315
|
- `~/.codespine.pid` - MCP background server PID
|
|
291
316
|
- `~/.codespine.log` - server log
|
|
292
317
|
- `~/.codespine_embedding_cache.json` - embedding cache
|
|
@@ -297,8 +322,9 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
|
|
|
297
322
|
|
|
298
323
|
- `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
|
|
299
324
|
- `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
|
|
300
|
-
- `codespine clear-index` rebuilds the local index database from scratch.
|
|
325
|
+
- `codespine clear-index` rebuilds the local index database from scratch. This also removes the read replica; run `analyse` again to republish it.
|
|
301
326
|
- For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
|
|
327
|
+
- The first run after upgrading to v0.5.7 will not have a read replica yet. Run `codespine analyse` once to create it.
|
|
302
328
|
|
|
303
329
|
## Project Docs
|
|
304
330
|
|
|
@@ -8,6 +8,8 @@ It indexes classes, methods, calls, type relationships, cross-module links, git
|
|
|
8
8
|
|
|
9
9
|
It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
|
|
10
10
|
|
|
11
|
+
The MCP daemon and the indexer run independently. Querying while a full re-index is running no longer causes crashes or memory contention — reads go to an isolated snapshot that is atomically updated when indexing completes.
|
|
12
|
+
|
|
11
13
|
## Why It Saves Tokens
|
|
12
14
|
|
|
13
15
|
- One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
|
|
@@ -37,6 +39,8 @@ pip install "codespine[ml]"
|
|
|
37
39
|
- Community detection: structural clusters for architectural context
|
|
38
40
|
- Change coupling: git-history-based file relationships
|
|
39
41
|
- Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
|
|
42
|
+
- Cross-module call linking: signature-based detection of calls between Maven/Gradle modules
|
|
43
|
+
- Concurrent read/write isolation: MCP queries run against a read replica; the indexer writes separately, with no memory contention
|
|
40
44
|
- MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
|
|
41
45
|
|
|
42
46
|
## Editing Without Stale Indexes
|
|
@@ -95,15 +99,23 @@ Parsing code... 8/8
|
|
|
95
99
|
Tracing calls... 847 calls resolved
|
|
96
100
|
Analyzing types... 234 type relationships
|
|
97
101
|
Cross-module linking... skipped (single module)
|
|
102
|
+
Detecting communities... loading symbols
|
|
103
|
+
Detecting communities... 623 symbols, 1204 structural edges
|
|
104
|
+
Detecting communities... persisting 8/8 clusters
|
|
98
105
|
Detecting communities... 8 clusters found
|
|
106
|
+
Detecting execution flows... 34 entry points, tracing
|
|
99
107
|
Detecting execution flows... 34 processes found
|
|
100
108
|
Finding dead code... 12 unreachable symbols
|
|
109
|
+
Analyzing git history... 18 commits, computing co-changes
|
|
101
110
|
Analyzing git history... 18 coupled file pairs
|
|
102
111
|
Generating embeddings... 0 vectors stored
|
|
103
112
|
|
|
104
113
|
Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows (no embeddings; rerun with --embed for semantic search)
|
|
114
|
+
Publishing read replica... MCP will reload automatically
|
|
105
115
|
```
|
|
106
116
|
|
|
117
|
+
Each analysis phase streams live progress in place. The final step publishes a read replica so the MCP daemon picks up the new index without restarting.
|
|
118
|
+
|
|
107
119
|
Search the index:
|
|
108
120
|
|
|
109
121
|
```bash
|
|
@@ -220,9 +232,22 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
|
|
|
220
232
|
|
|
221
233
|
`--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
|
|
222
234
|
|
|
235
|
+
## Concurrent Indexing and Querying
|
|
236
|
+
|
|
237
|
+
The indexer (write) and the MCP daemon (read) use separate database paths:
|
|
238
|
+
|
|
239
|
+
- The indexer writes to `~/.codespine_db` with a 512 MB buffer pool.
|
|
240
|
+
- When indexing completes, `analyse` atomically copies the database to `~/.codespine_db_read` and touches a sentinel file.
|
|
241
|
+
- The MCP daemon and all read-only CLI commands open `~/.codespine_db_read` with a 128 MB buffer pool.
|
|
242
|
+
- The MCP daemon watches the sentinel file and silently reloads from the new snapshot on the next tool call — no restart needed.
|
|
243
|
+
|
|
244
|
+
Running `codespine analyse --deep --embed` on one project while querying a different one no longer causes buffer pool OOM or lock contention.
|
|
245
|
+
|
|
223
246
|
## Runtime Files
|
|
224
247
|
|
|
225
|
-
- `~/.codespine_db` - graph database
|
|
248
|
+
- `~/.codespine_db` - graph database (write)
|
|
249
|
+
- `~/.codespine_db_read` - read replica used by MCP and CLI queries
|
|
250
|
+
- `~/.codespine_db_read.updated` - sentinel file; touched after each successful snapshot
|
|
226
251
|
- `~/.codespine.pid` - MCP background server PID
|
|
227
252
|
- `~/.codespine.log` - server log
|
|
228
253
|
- `~/.codespine_embedding_cache.json` - embedding cache
|
|
@@ -233,8 +258,9 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
|
|
|
233
258
|
|
|
234
259
|
- `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
|
|
235
260
|
- `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
|
|
236
|
-
- `codespine clear-index` rebuilds the local index database from scratch.
|
|
261
|
+
- `codespine clear-index` rebuilds the local index database from scratch. This also removes the read replica; run `analyse` again to republish it.
|
|
237
262
|
- For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
|
|
263
|
+
- The first run after upgrading to v0.5.7 will not have a read replica yet. Run `codespine analyse` once to create it.
|
|
238
264
|
|
|
239
265
|
## Project Docs
|
|
240
266
|
|
|
@@ -319,6 +319,16 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
|
|
|
319
319
|
fg="green",
|
|
320
320
|
)
|
|
321
321
|
|
|
322
|
+
# Publish a read replica so MCP and read-only CLI commands (search, stats…)
|
|
323
|
+
# run against an isolated snapshot rather than competing with the write
|
|
324
|
+
# process's buffer pool. The MCP daemon detects the sentinel file and
|
|
325
|
+
# hot-reloads without restarting.
|
|
326
|
+
snap_label = "Publishing read replica..."
|
|
327
|
+
_live_phase(snap_label, "copying")
|
|
328
|
+
store._recycle_conn()
|
|
329
|
+
snapped = GraphStore.snapshot_to_read_replica()
|
|
330
|
+
_finish_phase(snap_label, "MCP will reload automatically" if snapped else "skipped (source DB not found)")
|
|
331
|
+
|
|
322
332
|
|
|
323
333
|
@main.command()
|
|
324
334
|
@click.argument("query")
|
|
@@ -462,19 +472,43 @@ def stats(as_json: bool) -> None:
|
|
|
462
472
|
"MATCH (f:File) WHERE f.project_id = $pid RETURN count(f) as n", {"pid": pid}
|
|
463
473
|
)
|
|
464
474
|
classes = store.query_records(
|
|
465
|
-
"
|
|
475
|
+
"""
|
|
476
|
+
MATCH (f:File) WHERE f.project_id = $pid
|
|
477
|
+
WITH f
|
|
478
|
+
MATCH (c:Class) WHERE c.file_id = f.id
|
|
479
|
+
RETURN count(c) as n
|
|
480
|
+
""",
|
|
466
481
|
{"pid": pid},
|
|
467
482
|
)
|
|
468
483
|
methods = store.query_records(
|
|
469
|
-
"
|
|
484
|
+
"""
|
|
485
|
+
MATCH (f:File) WHERE f.project_id = $pid
|
|
486
|
+
WITH f
|
|
487
|
+
MATCH (c:Class) WHERE c.file_id = f.id
|
|
488
|
+
WITH c
|
|
489
|
+
MATCH (c)-[:HAS_METHOD]->(m:Method)
|
|
490
|
+
RETURN count(m) as n
|
|
491
|
+
""",
|
|
470
492
|
{"pid": pid},
|
|
471
493
|
)
|
|
472
494
|
calls = store.query_records(
|
|
473
|
-
"
|
|
495
|
+
"""
|
|
496
|
+
MATCH (f:File) WHERE f.project_id = $pid
|
|
497
|
+
WITH f
|
|
498
|
+
MATCH (c:Class) WHERE c.file_id = f.id
|
|
499
|
+
WITH c
|
|
500
|
+
MATCH (c)-[:HAS_METHOD]->(m:Method)-[:CALLS]->()
|
|
501
|
+
RETURN count(*) as n
|
|
502
|
+
""",
|
|
474
503
|
{"pid": pid},
|
|
475
504
|
)
|
|
476
505
|
emb = store.query_records(
|
|
477
|
-
"
|
|
506
|
+
"""
|
|
507
|
+
MATCH (f:File) WHERE f.project_id = $pid
|
|
508
|
+
WITH f
|
|
509
|
+
MATCH (s:Symbol) WHERE s.file_id = f.id AND s.embedding IS NOT NULL
|
|
510
|
+
RETURN count(s) as n
|
|
511
|
+
""",
|
|
478
512
|
{"pid": pid},
|
|
479
513
|
)
|
|
480
514
|
rows.append({
|
|
@@ -5,6 +5,7 @@ from dataclasses import dataclass
|
|
|
5
5
|
@dataclass(frozen=True)
|
|
6
6
|
class Settings:
|
|
7
7
|
db_path: str = os.path.expanduser("~/.codespine_db")
|
|
8
|
+
db_snapshot_path: str = os.path.expanduser("~/.codespine_db_read")
|
|
8
9
|
pid_file: str = os.path.expanduser("~/.codespine.pid")
|
|
9
10
|
log_file: str = os.path.expanduser("~/.codespine.log")
|
|
10
11
|
embedding_cache_path: str = os.path.expanduser("~/.codespine_embedding_cache.json")
|
|
@@ -18,7 +18,8 @@ from codespine.db.schema import ensure_schema
|
|
|
18
18
|
|
|
19
19
|
LOGGER = logging.getLogger(__name__)
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
_WRITE_BUFFER_POOL_SIZE = 512 * 1024 * 1024 # 512 MB – room for large community detection
|
|
22
|
+
_READ_BUFFER_POOL_SIZE = 128 * 1024 * 1024 # 128 MB – point queries only; keep footprint small
|
|
22
23
|
_RECOVERABLE_DB_ERROR_MARKERS = (
|
|
23
24
|
"storage version mismatch",
|
|
24
25
|
"catalog version mismatch",
|
|
@@ -35,11 +36,18 @@ class GraphStore:
|
|
|
35
36
|
read_only: bool = False
|
|
36
37
|
|
|
37
38
|
def __post_init__(self) -> None:
|
|
38
|
-
db_path = SETTINGS.db_path
|
|
39
39
|
self._tls: threading.local = threading.local()
|
|
40
40
|
from codespine.overlay.store import OverlayStore
|
|
41
41
|
|
|
42
42
|
self.overlay_store = OverlayStore()
|
|
43
|
+
|
|
44
|
+
# Read-only callers (MCP, CLI reads) use the read replica when available.
|
|
45
|
+
# This isolates them from the write process's buffer pool and WAL churn.
|
|
46
|
+
if self.read_only and os.path.exists(SETTINGS.db_snapshot_path):
|
|
47
|
+
db_path = SETTINGS.db_snapshot_path
|
|
48
|
+
else:
|
|
49
|
+
db_path = SETTINGS.db_path
|
|
50
|
+
|
|
43
51
|
try:
|
|
44
52
|
self.db = self._open_with_recovery(db_path)
|
|
45
53
|
except Exception as exc:
|
|
@@ -50,11 +58,12 @@ class GraphStore:
|
|
|
50
58
|
self._ensure_schema_with_recovery()
|
|
51
59
|
|
|
52
60
|
def _open_db(self, path: str) -> kuzu.Database:
|
|
61
|
+
pool = _READ_BUFFER_POOL_SIZE if self.read_only else _WRITE_BUFFER_POOL_SIZE
|
|
53
62
|
# Newer Kuzu versions accept read_only; fall back for older ones.
|
|
54
63
|
try:
|
|
55
|
-
return kuzu.Database(path, buffer_pool_size=
|
|
64
|
+
return kuzu.Database(path, buffer_pool_size=pool, read_only=self.read_only)
|
|
56
65
|
except TypeError:
|
|
57
|
-
return kuzu.Database(path, buffer_pool_size=
|
|
66
|
+
return kuzu.Database(path, buffer_pool_size=pool)
|
|
58
67
|
|
|
59
68
|
@staticmethod
|
|
60
69
|
def _is_recoverable_db_error(exc: Exception) -> bool:
|
|
@@ -553,6 +562,41 @@ class GraphStore:
|
|
|
553
562
|
},
|
|
554
563
|
)
|
|
555
564
|
|
|
565
|
+
@staticmethod
|
|
566
|
+
def snapshot_to_read_replica() -> bool:
|
|
567
|
+
"""Atomically copy the write DB to the read-replica path.
|
|
568
|
+
|
|
569
|
+
The read replica is used by the MCP daemon and all read-only CLI
|
|
570
|
+
commands so they never contend with the write process's buffer pool.
|
|
571
|
+
Returns True on success, False if the source DB does not exist.
|
|
572
|
+
"""
|
|
573
|
+
src = SETTINGS.db_path
|
|
574
|
+
dst = SETTINGS.db_snapshot_path
|
|
575
|
+
if not os.path.exists(src):
|
|
576
|
+
return False
|
|
577
|
+
tmp = dst + ".tmp"
|
|
578
|
+
try:
|
|
579
|
+
if os.path.exists(tmp):
|
|
580
|
+
shutil.rmtree(tmp, ignore_errors=True)
|
|
581
|
+
if os.path.isdir(src):
|
|
582
|
+
shutil.copytree(src, tmp)
|
|
583
|
+
else:
|
|
584
|
+
os.makedirs(os.path.dirname(dst) or ".", exist_ok=True)
|
|
585
|
+
shutil.copy2(src, tmp)
|
|
586
|
+
if os.path.exists(dst):
|
|
587
|
+
shutil.rmtree(dst, ignore_errors=True)
|
|
588
|
+
os.rename(tmp, dst)
|
|
589
|
+
# Sentinel: MCP daemon watches this file's mtime to know when to reload.
|
|
590
|
+
sentinel = dst + ".updated"
|
|
591
|
+
with open(sentinel, "w", encoding="utf-8") as f:
|
|
592
|
+
f.write(str(int(time.time())))
|
|
593
|
+
return True
|
|
594
|
+
except Exception as exc:
|
|
595
|
+
LOGGER.warning("Snapshot to read replica failed: %s", exc)
|
|
596
|
+
if os.path.exists(tmp):
|
|
597
|
+
shutil.rmtree(tmp, ignore_errors=True)
|
|
598
|
+
return False
|
|
599
|
+
|
|
556
600
|
def query_records(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
|
|
557
601
|
frame = self.execute(query, params or {}).get_as_df()
|
|
558
602
|
if frame.empty:
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import json as _json_mod
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
4
6
|
import subprocess
|
|
5
7
|
import sys
|
|
6
8
|
import tempfile
|
|
@@ -8,6 +10,10 @@ import time
|
|
|
8
10
|
|
|
9
11
|
from fastmcp import FastMCP
|
|
10
12
|
|
|
13
|
+
from codespine.config import SETTINGS
|
|
14
|
+
|
|
15
|
+
_LOGGER = logging.getLogger(__name__)
|
|
16
|
+
|
|
11
17
|
from codespine import __version__
|
|
12
18
|
from codespine.analysis.community import detect_communities, symbol_community
|
|
13
19
|
from codespine.analysis.context import build_symbol_context
|
|
@@ -129,7 +135,47 @@ def _staleness_meta(
|
|
|
129
135
|
return _json(response)
|
|
130
136
|
|
|
131
137
|
|
|
138
|
+
class _StoreProxy:
|
|
139
|
+
"""Wraps a GraphStore and hot-reloads from the read replica when the
|
|
140
|
+
post-analyse sentinel file is touched.
|
|
141
|
+
|
|
142
|
+
After `codespine analyse` finishes it copies the write DB to
|
|
143
|
+
``~/.codespine_db_read`` and writes ``~/.codespine_db_read.updated``.
|
|
144
|
+
This proxy checks that sentinel's mtime before every attribute access and
|
|
145
|
+
silently swaps in a fresh read-only GraphStore so the MCP daemon picks up
|
|
146
|
+
the new index without restarting.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
def __init__(self, store) -> None:
|
|
150
|
+
object.__setattr__(self, "_store", store)
|
|
151
|
+
object.__setattr__(self, "_sentinel", SETTINGS.db_snapshot_path + ".updated")
|
|
152
|
+
object.__setattr__(self, "_last_mtime", self._sentinel_mtime())
|
|
153
|
+
|
|
154
|
+
def _sentinel_mtime(self) -> float:
|
|
155
|
+
try:
|
|
156
|
+
return os.path.getmtime(object.__getattribute__(self, "_sentinel"))
|
|
157
|
+
except FileNotFoundError:
|
|
158
|
+
return 0.0
|
|
159
|
+
|
|
160
|
+
def _maybe_reload(self) -> None:
|
|
161
|
+
current = self._sentinel_mtime()
|
|
162
|
+
if current > object.__getattribute__(self, "_last_mtime"):
|
|
163
|
+
from codespine.db.store import GraphStore as _GS
|
|
164
|
+
try:
|
|
165
|
+
new_store = _GS(read_only=True)
|
|
166
|
+
object.__setattr__(self, "_store", new_store)
|
|
167
|
+
object.__setattr__(self, "_last_mtime", current)
|
|
168
|
+
_LOGGER.info("MCP: hot-reloaded GraphStore from updated snapshot")
|
|
169
|
+
except Exception as exc:
|
|
170
|
+
_LOGGER.warning("MCP: hot-reload failed: %s", exc)
|
|
171
|
+
|
|
172
|
+
def __getattr__(self, name: str):
|
|
173
|
+
self._maybe_reload()
|
|
174
|
+
return getattr(object.__getattribute__(self, "_store"), name)
|
|
175
|
+
|
|
176
|
+
|
|
132
177
|
def build_mcp_server(store, repo_path_provider):
|
|
178
|
+
store = _StoreProxy(store)
|
|
133
179
|
_raw_mcp = FastMCP("codespine")
|
|
134
180
|
overlay_store = getattr(store, "overlay_store", None)
|
|
135
181
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codespine
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.8
|
|
4
4
|
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
5
|
Author: CodeSpine contributors
|
|
6
6
|
License: MIT License
|
|
@@ -72,6 +72,8 @@ It indexes classes, methods, calls, type relationships, cross-module links, git
|
|
|
72
72
|
|
|
73
73
|
It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
|
|
74
74
|
|
|
75
|
+
The MCP daemon and the indexer run independently. Querying while a full re-index is running no longer causes crashes or memory contention — reads go to an isolated snapshot that is atomically updated when indexing completes.
|
|
76
|
+
|
|
75
77
|
## Why It Saves Tokens
|
|
76
78
|
|
|
77
79
|
- One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
|
|
@@ -101,6 +103,8 @@ pip install "codespine[ml]"
|
|
|
101
103
|
- Community detection: structural clusters for architectural context
|
|
102
104
|
- Change coupling: git-history-based file relationships
|
|
103
105
|
- Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
|
|
106
|
+
- Cross-module call linking: signature-based detection of calls between Maven/Gradle modules
|
|
107
|
+
- Concurrent read/write isolation: MCP queries run against a read replica; the indexer writes separately, with no memory contention
|
|
104
108
|
- MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
|
|
105
109
|
|
|
106
110
|
## Editing Without Stale Indexes
|
|
@@ -159,15 +163,23 @@ Parsing code... 8/8
|
|
|
159
163
|
Tracing calls... 847 calls resolved
|
|
160
164
|
Analyzing types... 234 type relationships
|
|
161
165
|
Cross-module linking... skipped (single module)
|
|
166
|
+
Detecting communities... loading symbols
|
|
167
|
+
Detecting communities... 623 symbols, 1204 structural edges
|
|
168
|
+
Detecting communities... persisting 8/8 clusters
|
|
162
169
|
Detecting communities... 8 clusters found
|
|
170
|
+
Detecting execution flows... 34 entry points, tracing
|
|
163
171
|
Detecting execution flows... 34 processes found
|
|
164
172
|
Finding dead code... 12 unreachable symbols
|
|
173
|
+
Analyzing git history... 18 commits, computing co-changes
|
|
165
174
|
Analyzing git history... 18 coupled file pairs
|
|
166
175
|
Generating embeddings... 0 vectors stored
|
|
167
176
|
|
|
168
177
|
Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows (no embeddings; rerun with --embed for semantic search)
|
|
178
|
+
Publishing read replica... MCP will reload automatically
|
|
169
179
|
```
|
|
170
180
|
|
|
181
|
+
Each analysis phase streams live progress in place. The final step publishes a read replica so the MCP daemon picks up the new index without restarting.
|
|
182
|
+
|
|
171
183
|
Search the index:
|
|
172
184
|
|
|
173
185
|
```bash
|
|
@@ -284,9 +296,22 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
|
|
|
284
296
|
|
|
285
297
|
`--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
|
|
286
298
|
|
|
299
|
+
## Concurrent Indexing and Querying
|
|
300
|
+
|
|
301
|
+
The indexer (write) and the MCP daemon (read) use separate database paths:
|
|
302
|
+
|
|
303
|
+
- The indexer writes to `~/.codespine_db` with a 512 MB buffer pool.
|
|
304
|
+
- When indexing completes, `analyse` atomically copies the database to `~/.codespine_db_read` and touches a sentinel file.
|
|
305
|
+
- The MCP daemon and all read-only CLI commands open `~/.codespine_db_read` with a 128 MB buffer pool.
|
|
306
|
+
- The MCP daemon watches the sentinel file and silently reloads from the new snapshot on the next tool call — no restart needed.
|
|
307
|
+
|
|
308
|
+
Running `codespine analyse --deep --embed` on one project while querying a different one no longer causes buffer pool OOM or lock contention.
|
|
309
|
+
|
|
287
310
|
## Runtime Files
|
|
288
311
|
|
|
289
|
-
- `~/.codespine_db` - graph database
|
|
312
|
+
- `~/.codespine_db` - graph database (write)
|
|
313
|
+
- `~/.codespine_db_read` - read replica used by MCP and CLI queries
|
|
314
|
+
- `~/.codespine_db_read.updated` - sentinel file; touched after each successful snapshot
|
|
290
315
|
- `~/.codespine.pid` - MCP background server PID
|
|
291
316
|
- `~/.codespine.log` - server log
|
|
292
317
|
- `~/.codespine_embedding_cache.json` - embedding cache
|
|
@@ -297,8 +322,9 @@ When a dirty overlay exists, deep-analysis results intentionally exclude those u
|
|
|
297
322
|
|
|
298
323
|
- `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
|
|
299
324
|
- `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
|
|
300
|
-
- `codespine clear-index` rebuilds the local index database from scratch.
|
|
325
|
+
- `codespine clear-index` rebuilds the local index database from scratch. This also removes the read replica; run `analyse` again to republish it.
|
|
301
326
|
- For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
|
|
327
|
+
- The first run after upgrading to v0.5.7 will not have a read replica yet. Run `codespine analyse` once to create it.
|
|
302
328
|
|
|
303
329
|
## Project Docs
|
|
304
330
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|