codespine 0.5.5__tar.gz → 0.5.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.5.5 → codespine-0.5.7}/PKG-INFO +38 -1
- {codespine-0.5.5 → codespine-0.5.7}/README.md +37 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/__init__.py +1 -1
- {codespine-0.5.5 → codespine-0.5.7}/codespine/cli.py +38 -4
- {codespine-0.5.5 → codespine-0.5.7}/codespine/config.py +1 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/db/store.py +98 -15
- {codespine-0.5.5 → codespine-0.5.7}/codespine/mcp/server.py +46 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/PKG-INFO +38 -1
- {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/SOURCES.txt +2 -1
- {codespine-0.5.5 → codespine-0.5.7}/pyproject.toml +1 -1
- codespine-0.5.7/tests/test_store_recovery.py +52 -0
- {codespine-0.5.5 → codespine-0.5.7}/LICENSE +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/__init__.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/community.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/context.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/coupling.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/crossmodule.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/flow.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/analysis/impact.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/db/__init__.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/db/schema.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/diff/__init__.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/indexer/__init__.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/indexer/engine.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/mcp/__init__.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/noise/__init__.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/noise/blocklist.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/overlay/__init__.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/overlay/git_state.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/overlay/merge.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/overlay/store.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/search/__init__.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/search/bm25.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/search/fuzzy.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/search/hybrid.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/search/rrf.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/search/vector.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/watch/__init__.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine/watch/watcher.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/gindex.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/setup.cfg +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/tests/test_call_resolver.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/tests/test_community_detection.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/tests/test_deadcode.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/tests/test_java_parser.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/tests/test_multimodule_index.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/tests/test_overlay.py +0 -0
- {codespine-0.5.5 → codespine-0.5.7}/tests/test_search_ranking.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codespine
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.7
|
|
4
4
|
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
5
|
Author: CodeSpine contributors
|
|
6
6
|
License: MIT License
|
|
@@ -70,12 +70,15 @@ Instead of having an agent open dozens of `.java` files to answer one question,
|
|
|
70
70
|
|
|
71
71
|
It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
|
|
72
72
|
|
|
73
|
+
It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
|
|
74
|
+
|
|
73
75
|
## Why It Saves Tokens
|
|
74
76
|
|
|
75
77
|
- One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
|
|
76
78
|
- Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
|
|
77
79
|
- Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
|
|
78
80
|
- Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
|
|
81
|
+
- Active edits stay smooth. Dirty files are kept in an overlay and merged into fast queries until you commit, instead of hammering the main graph DB on each change.
|
|
79
82
|
|
|
80
83
|
## Install
|
|
81
84
|
|
|
@@ -100,6 +103,32 @@ pip install "codespine[ml]"
|
|
|
100
103
|
- Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
|
|
101
104
|
- MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
|
|
102
105
|
|
|
106
|
+
## Editing Without Stale Indexes
|
|
107
|
+
|
|
108
|
+
CodeSpine uses a two-layer model:
|
|
109
|
+
|
|
110
|
+
- Base index: last committed state
|
|
111
|
+
- Dirty overlay: uncommitted Java changes
|
|
112
|
+
|
|
113
|
+
Fast tools read merged `base + overlay` state by default:
|
|
114
|
+
|
|
115
|
+
- `search`
|
|
116
|
+
- `context`
|
|
117
|
+
- `impact`
|
|
118
|
+
- MCP `search_hybrid`
|
|
119
|
+
- MCP `find_symbol`
|
|
120
|
+
- MCP `get_symbol_context`
|
|
121
|
+
- MCP `get_impact`
|
|
122
|
+
|
|
123
|
+
Deep analyses stay committed-only until promotion:
|
|
124
|
+
|
|
125
|
+
- `deadcode`
|
|
126
|
+
- `flow`
|
|
127
|
+
- `community`
|
|
128
|
+
- `coupling`
|
|
129
|
+
|
|
130
|
+
`codespine watch` updates the dirty overlay after a debounce window, then promotes it into the base index when local `HEAD` changes.
|
|
131
|
+
|
|
103
132
|
## Quick Start
|
|
104
133
|
|
|
105
134
|
Index a repo:
|
|
@@ -205,6 +234,7 @@ codespine analyse <path> --full
|
|
|
205
234
|
codespine analyse <path> --deep
|
|
206
235
|
codespine analyse <path> --embed
|
|
207
236
|
codespine watch --path .
|
|
237
|
+
codespine watch --path . --overlay-debounce-ms 1500
|
|
208
238
|
codespine search "query"
|
|
209
239
|
codespine context "symbol"
|
|
210
240
|
codespine impact "symbol"
|
|
@@ -215,6 +245,9 @@ codespine coupling
|
|
|
215
245
|
codespine diff main..feature
|
|
216
246
|
codespine stats
|
|
217
247
|
codespine list
|
|
248
|
+
codespine overlay-status
|
|
249
|
+
codespine overlay-promote
|
|
250
|
+
codespine overlay-clear
|
|
218
251
|
codespine clear-project <project_id>
|
|
219
252
|
codespine clear-index
|
|
220
253
|
```
|
|
@@ -247,6 +280,8 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
|
|
|
247
280
|
|
|
248
281
|
Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
|
|
249
282
|
|
|
283
|
+
When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
|
|
284
|
+
|
|
250
285
|
`--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
|
|
251
286
|
|
|
252
287
|
## Runtime Files
|
|
@@ -256,10 +291,12 @@ Use it when you want architecture-level context. Skip it when you just need the
|
|
|
256
291
|
- `~/.codespine.log` - server log
|
|
257
292
|
- `~/.codespine_embedding_cache.json` - embedding cache
|
|
258
293
|
- `~/.codespine_index_meta/` - incremental file metadata cache
|
|
294
|
+
- `~/.codespine_overlay/` - uncommitted dirty overlay state
|
|
259
295
|
|
|
260
296
|
## Notes
|
|
261
297
|
|
|
262
298
|
- `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
|
|
299
|
+
- `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
|
|
263
300
|
- `codespine clear-index` rebuilds the local index database from scratch.
|
|
264
301
|
- For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
|
|
265
302
|
|
|
@@ -6,12 +6,15 @@ Instead of having an agent open dozens of `.java` files to answer one question,
|
|
|
6
6
|
|
|
7
7
|
It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
|
|
8
8
|
|
|
9
|
+
It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
|
|
10
|
+
|
|
9
11
|
## Why It Saves Tokens
|
|
10
12
|
|
|
11
13
|
- One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
|
|
12
14
|
- Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
|
|
13
15
|
- Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
|
|
14
16
|
- Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
|
|
17
|
+
- Active edits stay smooth. Dirty files are kept in an overlay and merged into fast queries until you commit, instead of hammering the main graph DB on each change.
|
|
15
18
|
|
|
16
19
|
## Install
|
|
17
20
|
|
|
@@ -36,6 +39,32 @@ pip install "codespine[ml]"
|
|
|
36
39
|
- Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
|
|
37
40
|
- MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
|
|
38
41
|
|
|
42
|
+
## Editing Without Stale Indexes
|
|
43
|
+
|
|
44
|
+
CodeSpine uses a two-layer model:
|
|
45
|
+
|
|
46
|
+
- Base index: last committed state
|
|
47
|
+
- Dirty overlay: uncommitted Java changes
|
|
48
|
+
|
|
49
|
+
Fast tools read merged `base + overlay` state by default:
|
|
50
|
+
|
|
51
|
+
- `search`
|
|
52
|
+
- `context`
|
|
53
|
+
- `impact`
|
|
54
|
+
- MCP `search_hybrid`
|
|
55
|
+
- MCP `find_symbol`
|
|
56
|
+
- MCP `get_symbol_context`
|
|
57
|
+
- MCP `get_impact`
|
|
58
|
+
|
|
59
|
+
Deep analyses stay committed-only until promotion:
|
|
60
|
+
|
|
61
|
+
- `deadcode`
|
|
62
|
+
- `flow`
|
|
63
|
+
- `community`
|
|
64
|
+
- `coupling`
|
|
65
|
+
|
|
66
|
+
`codespine watch` updates the dirty overlay after a debounce window, then promotes it into the base index when local `HEAD` changes.
|
|
67
|
+
|
|
39
68
|
## Quick Start
|
|
40
69
|
|
|
41
70
|
Index a repo:
|
|
@@ -141,6 +170,7 @@ codespine analyse <path> --full
|
|
|
141
170
|
codespine analyse <path> --deep
|
|
142
171
|
codespine analyse <path> --embed
|
|
143
172
|
codespine watch --path .
|
|
173
|
+
codespine watch --path . --overlay-debounce-ms 1500
|
|
144
174
|
codespine search "query"
|
|
145
175
|
codespine context "symbol"
|
|
146
176
|
codespine impact "symbol"
|
|
@@ -151,6 +181,9 @@ codespine coupling
|
|
|
151
181
|
codespine diff main..feature
|
|
152
182
|
codespine stats
|
|
153
183
|
codespine list
|
|
184
|
+
codespine overlay-status
|
|
185
|
+
codespine overlay-promote
|
|
186
|
+
codespine overlay-clear
|
|
154
187
|
codespine clear-project <project_id>
|
|
155
188
|
codespine clear-index
|
|
156
189
|
```
|
|
@@ -183,6 +216,8 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
|
|
|
183
216
|
|
|
184
217
|
Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
|
|
185
218
|
|
|
219
|
+
When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
|
|
220
|
+
|
|
186
221
|
`--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
|
|
187
222
|
|
|
188
223
|
## Runtime Files
|
|
@@ -192,10 +227,12 @@ Use it when you want architecture-level context. Skip it when you just need the
|
|
|
192
227
|
- `~/.codespine.log` - server log
|
|
193
228
|
- `~/.codespine_embedding_cache.json` - embedding cache
|
|
194
229
|
- `~/.codespine_index_meta/` - incremental file metadata cache
|
|
230
|
+
- `~/.codespine_overlay/` - uncommitted dirty overlay state
|
|
195
231
|
|
|
196
232
|
## Notes
|
|
197
233
|
|
|
198
234
|
- `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
|
|
235
|
+
- `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
|
|
199
236
|
- `codespine clear-index` rebuilds the local index database from scratch.
|
|
200
237
|
- For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
|
|
201
238
|
|
|
@@ -319,6 +319,16 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
|
|
|
319
319
|
fg="green",
|
|
320
320
|
)
|
|
321
321
|
|
|
322
|
+
# Publish a read replica so MCP and read-only CLI commands (search, stats…)
|
|
323
|
+
# run against an isolated snapshot rather than competing with the write
|
|
324
|
+
# process's buffer pool. The MCP daemon detects the sentinel file and
|
|
325
|
+
# hot-reloads without restarting.
|
|
326
|
+
snap_label = "Publishing read replica..."
|
|
327
|
+
_live_phase(snap_label, "copying")
|
|
328
|
+
store._recycle_conn()
|
|
329
|
+
snapped = GraphStore.snapshot_to_read_replica()
|
|
330
|
+
_finish_phase(snap_label, "MCP will reload automatically" if snapped else "skipped (source DB not found)")
|
|
331
|
+
|
|
322
332
|
|
|
323
333
|
@main.command()
|
|
324
334
|
@click.argument("query")
|
|
@@ -462,19 +472,43 @@ def stats(as_json: bool) -> None:
|
|
|
462
472
|
"MATCH (f:File) WHERE f.project_id = $pid RETURN count(f) as n", {"pid": pid}
|
|
463
473
|
)
|
|
464
474
|
classes = store.query_records(
|
|
465
|
-
"
|
|
475
|
+
"""
|
|
476
|
+
MATCH (f:File) WHERE f.project_id = $pid
|
|
477
|
+
WITH f
|
|
478
|
+
MATCH (c:Class) WHERE c.file_id = f.id
|
|
479
|
+
RETURN count(c) as n
|
|
480
|
+
""",
|
|
466
481
|
{"pid": pid},
|
|
467
482
|
)
|
|
468
483
|
methods = store.query_records(
|
|
469
|
-
"
|
|
484
|
+
"""
|
|
485
|
+
MATCH (f:File) WHERE f.project_id = $pid
|
|
486
|
+
WITH f
|
|
487
|
+
MATCH (c:Class) WHERE c.file_id = f.id
|
|
488
|
+
WITH c
|
|
489
|
+
MATCH (c)-[:HAS_METHOD]->(m:Method)
|
|
490
|
+
RETURN count(m) as n
|
|
491
|
+
""",
|
|
470
492
|
{"pid": pid},
|
|
471
493
|
)
|
|
472
494
|
calls = store.query_records(
|
|
473
|
-
"
|
|
495
|
+
"""
|
|
496
|
+
MATCH (f:File) WHERE f.project_id = $pid
|
|
497
|
+
WITH f
|
|
498
|
+
MATCH (c:Class) WHERE c.file_id = f.id
|
|
499
|
+
WITH c
|
|
500
|
+
MATCH (c)-[:HAS_METHOD]->(m:Method)-[:CALLS]->()
|
|
501
|
+
RETURN count(*) as n
|
|
502
|
+
""",
|
|
474
503
|
{"pid": pid},
|
|
475
504
|
)
|
|
476
505
|
emb = store.query_records(
|
|
477
|
-
"
|
|
506
|
+
"""
|
|
507
|
+
MATCH (f:File) WHERE f.project_id = $pid
|
|
508
|
+
WITH f
|
|
509
|
+
MATCH (s:Symbol) WHERE s.file_id = f.id AND s.embedding IS NOT NULL
|
|
510
|
+
RETURN count(s) as n
|
|
511
|
+
""",
|
|
478
512
|
{"pid": pid},
|
|
479
513
|
)
|
|
480
514
|
rows.append({
|
|
@@ -5,6 +5,7 @@ from dataclasses import dataclass
|
|
|
5
5
|
@dataclass(frozen=True)
|
|
6
6
|
class Settings:
|
|
7
7
|
db_path: str = os.path.expanduser("~/.codespine_db")
|
|
8
|
+
db_snapshot_path: str = os.path.expanduser("~/.codespine_db_read")
|
|
8
9
|
pid_file: str = os.path.expanduser("~/.codespine.pid")
|
|
9
10
|
log_file: str = os.path.expanduser("~/.codespine.log")
|
|
10
11
|
embedding_cache_path: str = os.path.expanduser("~/.codespine_embedding_cache.json")
|
|
@@ -18,7 +18,17 @@ from codespine.db.schema import ensure_schema
|
|
|
18
18
|
|
|
19
19
|
LOGGER = logging.getLogger(__name__)
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
_WRITE_BUFFER_POOL_SIZE = 512 * 1024 * 1024 # 512 MB – room for large community detection
|
|
22
|
+
_READ_BUFFER_POOL_SIZE = 128 * 1024 * 1024 # 128 MB – point queries only; keep footprint small
|
|
23
|
+
_RECOVERABLE_DB_ERROR_MARKERS = (
|
|
24
|
+
"storage version mismatch",
|
|
25
|
+
"catalog version mismatch",
|
|
26
|
+
"database version is not supported",
|
|
27
|
+
"wal version mismatch",
|
|
28
|
+
"corrupt",
|
|
29
|
+
"corrupted",
|
|
30
|
+
"invalid database",
|
|
31
|
+
)
|
|
22
32
|
|
|
23
33
|
|
|
24
34
|
@dataclass
|
|
@@ -26,26 +36,70 @@ class GraphStore:
|
|
|
26
36
|
read_only: bool = False
|
|
27
37
|
|
|
28
38
|
def __post_init__(self) -> None:
|
|
29
|
-
db_path = SETTINGS.db_path
|
|
30
39
|
self._tls: threading.local = threading.local()
|
|
31
40
|
from codespine.overlay.store import OverlayStore
|
|
32
41
|
|
|
33
42
|
self.overlay_store = OverlayStore()
|
|
43
|
+
|
|
44
|
+
# Read-only callers (MCP, CLI reads) use the read replica when available.
|
|
45
|
+
# This isolates them from the write process's buffer pool and WAL churn.
|
|
46
|
+
if self.read_only and os.path.exists(SETTINGS.db_snapshot_path):
|
|
47
|
+
db_path = SETTINGS.db_snapshot_path
|
|
48
|
+
else:
|
|
49
|
+
db_path = SETTINGS.db_path
|
|
50
|
+
|
|
34
51
|
try:
|
|
35
|
-
self.db = self.
|
|
52
|
+
self.db = self._open_with_recovery(db_path)
|
|
36
53
|
except Exception as exc:
|
|
37
54
|
fallback = os.path.join("/tmp", ".codespine_db")
|
|
38
55
|
LOGGER.warning("Primary DB path failed (%s). Falling back to %s", exc, fallback)
|
|
39
|
-
self.db = self.
|
|
56
|
+
self.db = self._open_with_recovery(fallback)
|
|
40
57
|
if not self.read_only:
|
|
41
|
-
|
|
58
|
+
self._ensure_schema_with_recovery()
|
|
42
59
|
|
|
43
60
|
def _open_db(self, path: str) -> kuzu.Database:
|
|
61
|
+
pool = _READ_BUFFER_POOL_SIZE if self.read_only else _WRITE_BUFFER_POOL_SIZE
|
|
44
62
|
# Newer Kuzu versions accept read_only; fall back for older ones.
|
|
45
63
|
try:
|
|
46
|
-
return kuzu.Database(path, buffer_pool_size=
|
|
64
|
+
return kuzu.Database(path, buffer_pool_size=pool, read_only=self.read_only)
|
|
47
65
|
except TypeError:
|
|
48
|
-
return kuzu.Database(path, buffer_pool_size=
|
|
66
|
+
return kuzu.Database(path, buffer_pool_size=pool)
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def _is_recoverable_db_error(exc: Exception) -> bool:
|
|
70
|
+
message = str(exc).lower()
|
|
71
|
+
return any(marker in message for marker in _RECOVERABLE_DB_ERROR_MARKERS)
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def _remove_db_path(path: str) -> None:
|
|
75
|
+
if os.path.isdir(path):
|
|
76
|
+
shutil.rmtree(path, ignore_errors=True)
|
|
77
|
+
elif os.path.exists(path):
|
|
78
|
+
os.remove(path)
|
|
79
|
+
|
|
80
|
+
def _open_with_recovery(self, path: str) -> kuzu.Database:
|
|
81
|
+
try:
|
|
82
|
+
return self._open_db(path)
|
|
83
|
+
except Exception as exc:
|
|
84
|
+
if not self._is_recoverable_db_error(exc):
|
|
85
|
+
raise
|
|
86
|
+
LOGGER.warning("Removing corrupted or incompatible Kuzu DB at %s: %s", path, exc)
|
|
87
|
+
self._remove_db_path(path)
|
|
88
|
+
self._tls = threading.local()
|
|
89
|
+
return self._open_db(path)
|
|
90
|
+
|
|
91
|
+
def _ensure_schema_with_recovery(self) -> None:
|
|
92
|
+
try:
|
|
93
|
+
ensure_schema(self._conn())
|
|
94
|
+
except Exception as exc:
|
|
95
|
+
path = getattr(self.db, "database_path", SETTINGS.db_path)
|
|
96
|
+
if not self._is_recoverable_db_error(exc):
|
|
97
|
+
raise
|
|
98
|
+
LOGGER.warning("Rebuilding corrupted or incompatible Kuzu DB at %s during schema init: %s", path, exc)
|
|
99
|
+
self._remove_db_path(path)
|
|
100
|
+
self.db = self._open_db(path)
|
|
101
|
+
self._tls = threading.local()
|
|
102
|
+
ensure_schema(self._conn())
|
|
49
103
|
|
|
50
104
|
def _conn(self) -> kuzu.Connection:
|
|
51
105
|
"""Return the per-thread Kuzu connection, creating it lazily."""
|
|
@@ -447,16 +501,10 @@ class GraphStore:
|
|
|
447
501
|
self._recycle_conn()
|
|
448
502
|
path = SETTINGS.db_path
|
|
449
503
|
try:
|
|
450
|
-
|
|
451
|
-
shutil.rmtree(path, ignore_errors=True)
|
|
452
|
-
elif os.path.exists(path):
|
|
453
|
-
os.remove(path)
|
|
504
|
+
self._remove_db_path(path)
|
|
454
505
|
except OSError:
|
|
455
506
|
fallback = os.path.join("/tmp", ".codespine_db")
|
|
456
|
-
|
|
457
|
-
shutil.rmtree(fallback, ignore_errors=True)
|
|
458
|
-
elif os.path.exists(fallback):
|
|
459
|
-
os.remove(fallback)
|
|
507
|
+
self._remove_db_path(fallback)
|
|
460
508
|
self.db = self._open_db(fallback)
|
|
461
509
|
else:
|
|
462
510
|
self.db = self._open_db(path)
|
|
@@ -514,6 +562,41 @@ class GraphStore:
|
|
|
514
562
|
},
|
|
515
563
|
)
|
|
516
564
|
|
|
565
|
+
@staticmethod
|
|
566
|
+
def snapshot_to_read_replica() -> bool:
|
|
567
|
+
"""Atomically copy the write DB to the read-replica path.
|
|
568
|
+
|
|
569
|
+
The read replica is used by the MCP daemon and all read-only CLI
|
|
570
|
+
commands so they never contend with the write process's buffer pool.
|
|
571
|
+
Returns True on success, False if the source DB does not exist.
|
|
572
|
+
"""
|
|
573
|
+
src = SETTINGS.db_path
|
|
574
|
+
dst = SETTINGS.db_snapshot_path
|
|
575
|
+
if not os.path.exists(src):
|
|
576
|
+
return False
|
|
577
|
+
tmp = dst + ".tmp"
|
|
578
|
+
try:
|
|
579
|
+
if os.path.exists(tmp):
|
|
580
|
+
shutil.rmtree(tmp, ignore_errors=True)
|
|
581
|
+
if os.path.isdir(src):
|
|
582
|
+
shutil.copytree(src, tmp)
|
|
583
|
+
else:
|
|
584
|
+
os.makedirs(os.path.dirname(dst) or ".", exist_ok=True)
|
|
585
|
+
shutil.copy2(src, tmp)
|
|
586
|
+
if os.path.exists(dst):
|
|
587
|
+
shutil.rmtree(dst, ignore_errors=True)
|
|
588
|
+
os.rename(tmp, dst)
|
|
589
|
+
# Sentinel: MCP daemon watches this file's mtime to know when to reload.
|
|
590
|
+
sentinel = dst + ".updated"
|
|
591
|
+
with open(sentinel, "w", encoding="utf-8") as f:
|
|
592
|
+
f.write(str(int(time.time())))
|
|
593
|
+
return True
|
|
594
|
+
except Exception as exc:
|
|
595
|
+
LOGGER.warning("Snapshot to read replica failed: %s", exc)
|
|
596
|
+
if os.path.exists(tmp):
|
|
597
|
+
shutil.rmtree(tmp, ignore_errors=True)
|
|
598
|
+
return False
|
|
599
|
+
|
|
517
600
|
def query_records(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
|
|
518
601
|
frame = self.execute(query, params or {}).get_as_df()
|
|
519
602
|
if frame.empty:
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import json as _json_mod
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
4
6
|
import subprocess
|
|
5
7
|
import sys
|
|
6
8
|
import tempfile
|
|
@@ -8,6 +10,10 @@ import time
|
|
|
8
10
|
|
|
9
11
|
from fastmcp import FastMCP
|
|
10
12
|
|
|
13
|
+
from codespine.config import SETTINGS
|
|
14
|
+
|
|
15
|
+
_LOGGER = logging.getLogger(__name__)
|
|
16
|
+
|
|
11
17
|
from codespine import __version__
|
|
12
18
|
from codespine.analysis.community import detect_communities, symbol_community
|
|
13
19
|
from codespine.analysis.context import build_symbol_context
|
|
@@ -129,7 +135,47 @@ def _staleness_meta(
|
|
|
129
135
|
return _json(response)
|
|
130
136
|
|
|
131
137
|
|
|
138
|
+
class _StoreProxy:
|
|
139
|
+
"""Wraps a GraphStore and hot-reloads from the read replica when the
|
|
140
|
+
post-analyse sentinel file is touched.
|
|
141
|
+
|
|
142
|
+
After `codespine analyse` finishes it copies the write DB to
|
|
143
|
+
``~/.codespine_db_read`` and writes ``~/.codespine_db_read.updated``.
|
|
144
|
+
This proxy checks that sentinel's mtime before every attribute access and
|
|
145
|
+
silently swaps in a fresh read-only GraphStore so the MCP daemon picks up
|
|
146
|
+
the new index without restarting.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
def __init__(self, store) -> None:
|
|
150
|
+
object.__setattr__(self, "_store", store)
|
|
151
|
+
object.__setattr__(self, "_sentinel", SETTINGS.db_snapshot_path + ".updated")
|
|
152
|
+
object.__setattr__(self, "_last_mtime", self._sentinel_mtime())
|
|
153
|
+
|
|
154
|
+
def _sentinel_mtime(self) -> float:
|
|
155
|
+
try:
|
|
156
|
+
return os.path.getmtime(object.__getattribute__(self, "_sentinel"))
|
|
157
|
+
except FileNotFoundError:
|
|
158
|
+
return 0.0
|
|
159
|
+
|
|
160
|
+
def _maybe_reload(self) -> None:
|
|
161
|
+
current = self._sentinel_mtime()
|
|
162
|
+
if current > object.__getattribute__(self, "_last_mtime"):
|
|
163
|
+
from codespine.db.store import GraphStore as _GS
|
|
164
|
+
try:
|
|
165
|
+
new_store = _GS(read_only=True)
|
|
166
|
+
object.__setattr__(self, "_store", new_store)
|
|
167
|
+
object.__setattr__(self, "_last_mtime", current)
|
|
168
|
+
_LOGGER.info("MCP: hot-reloaded GraphStore from updated snapshot")
|
|
169
|
+
except Exception as exc:
|
|
170
|
+
_LOGGER.warning("MCP: hot-reload failed: %s", exc)
|
|
171
|
+
|
|
172
|
+
def __getattr__(self, name: str):
|
|
173
|
+
self._maybe_reload()
|
|
174
|
+
return getattr(object.__getattribute__(self, "_store"), name)
|
|
175
|
+
|
|
176
|
+
|
|
132
177
|
def build_mcp_server(store, repo_path_provider):
|
|
178
|
+
store = _StoreProxy(store)
|
|
133
179
|
_raw_mcp = FastMCP("codespine")
|
|
134
180
|
overlay_store = getattr(store, "overlay_store", None)
|
|
135
181
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codespine
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.7
|
|
4
4
|
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
5
|
Author: CodeSpine contributors
|
|
6
6
|
License: MIT License
|
|
@@ -70,12 +70,15 @@ Instead of having an agent open dozens of `.java` files to answer one question,
|
|
|
70
70
|
|
|
71
71
|
It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
|
|
72
72
|
|
|
73
|
+
It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
|
|
74
|
+
|
|
73
75
|
## Why It Saves Tokens
|
|
74
76
|
|
|
75
77
|
- One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
|
|
76
78
|
- Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
|
|
77
79
|
- Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
|
|
78
80
|
- Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
|
|
81
|
+
- Active edits stay smooth. Dirty files are kept in an overlay and merged into fast queries until you commit, instead of hammering the main graph DB on each change.
|
|
79
82
|
|
|
80
83
|
## Install
|
|
81
84
|
|
|
@@ -100,6 +103,32 @@ pip install "codespine[ml]"
|
|
|
100
103
|
- Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
|
|
101
104
|
- MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
|
|
102
105
|
|
|
106
|
+
## Editing Without Stale Indexes
|
|
107
|
+
|
|
108
|
+
CodeSpine uses a two-layer model:
|
|
109
|
+
|
|
110
|
+
- Base index: last committed state
|
|
111
|
+
- Dirty overlay: uncommitted Java changes
|
|
112
|
+
|
|
113
|
+
Fast tools read merged `base + overlay` state by default:
|
|
114
|
+
|
|
115
|
+
- `search`
|
|
116
|
+
- `context`
|
|
117
|
+
- `impact`
|
|
118
|
+
- MCP `search_hybrid`
|
|
119
|
+
- MCP `find_symbol`
|
|
120
|
+
- MCP `get_symbol_context`
|
|
121
|
+
- MCP `get_impact`
|
|
122
|
+
|
|
123
|
+
Deep analyses stay committed-only until promotion:
|
|
124
|
+
|
|
125
|
+
- `deadcode`
|
|
126
|
+
- `flow`
|
|
127
|
+
- `community`
|
|
128
|
+
- `coupling`
|
|
129
|
+
|
|
130
|
+
`codespine watch` updates the dirty overlay after a debounce window, then promotes it into the base index when local `HEAD` changes.
|
|
131
|
+
|
|
103
132
|
## Quick Start
|
|
104
133
|
|
|
105
134
|
Index a repo:
|
|
@@ -205,6 +234,7 @@ codespine analyse <path> --full
|
|
|
205
234
|
codespine analyse <path> --deep
|
|
206
235
|
codespine analyse <path> --embed
|
|
207
236
|
codespine watch --path .
|
|
237
|
+
codespine watch --path . --overlay-debounce-ms 1500
|
|
208
238
|
codespine search "query"
|
|
209
239
|
codespine context "symbol"
|
|
210
240
|
codespine impact "symbol"
|
|
@@ -215,6 +245,9 @@ codespine coupling
|
|
|
215
245
|
codespine diff main..feature
|
|
216
246
|
codespine stats
|
|
217
247
|
codespine list
|
|
248
|
+
codespine overlay-status
|
|
249
|
+
codespine overlay-promote
|
|
250
|
+
codespine overlay-clear
|
|
218
251
|
codespine clear-project <project_id>
|
|
219
252
|
codespine clear-index
|
|
220
253
|
```
|
|
@@ -247,6 +280,8 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
|
|
|
247
280
|
|
|
248
281
|
Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
|
|
249
282
|
|
|
283
|
+
When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
|
|
284
|
+
|
|
250
285
|
`--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
|
|
251
286
|
|
|
252
287
|
## Runtime Files
|
|
@@ -256,10 +291,12 @@ Use it when you want architecture-level context. Skip it when you just need the
|
|
|
256
291
|
- `~/.codespine.log` - server log
|
|
257
292
|
- `~/.codespine_embedding_cache.json` - embedding cache
|
|
258
293
|
- `~/.codespine_index_meta/` - incremental file metadata cache
|
|
294
|
+
- `~/.codespine_overlay/` - uncommitted dirty overlay state
|
|
259
295
|
|
|
260
296
|
## Notes
|
|
261
297
|
|
|
262
298
|
- `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
|
|
299
|
+
- `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
|
|
263
300
|
- `codespine clear-index` rebuilds the local index database from scratch.
|
|
264
301
|
- For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
|
|
265
302
|
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
pytest.importorskip("kuzu")
|
|
6
|
+
|
|
7
|
+
from codespine.db.store import GraphStore
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_open_with_recovery_rebuilds_legacy_db(monkeypatch):
|
|
11
|
+
opened: list[str] = []
|
|
12
|
+
removed: list[str] = []
|
|
13
|
+
calls = {"count": 0}
|
|
14
|
+
|
|
15
|
+
def fake_open(self, path: str):
|
|
16
|
+
calls["count"] += 1
|
|
17
|
+
opened.append(path)
|
|
18
|
+
if calls["count"] == 1:
|
|
19
|
+
raise RuntimeError("Storage version mismatch: unsupported database")
|
|
20
|
+
return object()
|
|
21
|
+
|
|
22
|
+
monkeypatch.setattr(GraphStore, "_open_db", fake_open)
|
|
23
|
+
monkeypatch.setattr(GraphStore, "_remove_db_path", staticmethod(lambda path: removed.append(path)))
|
|
24
|
+
|
|
25
|
+
store = GraphStore.__new__(GraphStore)
|
|
26
|
+
store.read_only = False
|
|
27
|
+
store._tls = None
|
|
28
|
+
|
|
29
|
+
db = GraphStore._open_with_recovery(store, "/tmp/test-codespine-db")
|
|
30
|
+
|
|
31
|
+
assert db is not None
|
|
32
|
+
assert opened == ["/tmp/test-codespine-db", "/tmp/test-codespine-db"]
|
|
33
|
+
assert removed == ["/tmp/test-codespine-db"]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_open_with_recovery_does_not_remove_on_permission_error(monkeypatch):
|
|
37
|
+
removed: list[str] = []
|
|
38
|
+
|
|
39
|
+
def fake_open(self, path: str):
|
|
40
|
+
raise RuntimeError("Operation not permitted")
|
|
41
|
+
|
|
42
|
+
monkeypatch.setattr(GraphStore, "_open_db", fake_open)
|
|
43
|
+
monkeypatch.setattr(GraphStore, "_remove_db_path", staticmethod(lambda path: removed.append(path)))
|
|
44
|
+
|
|
45
|
+
store = GraphStore.__new__(GraphStore)
|
|
46
|
+
store.read_only = False
|
|
47
|
+
store._tls = None
|
|
48
|
+
|
|
49
|
+
with pytest.raises(RuntimeError, match="Operation not permitted"):
|
|
50
|
+
GraphStore._open_with_recovery(store, "/tmp/test-codespine-db")
|
|
51
|
+
|
|
52
|
+
assert removed == []
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|