codespine 0.5.4__tar.gz → 0.5.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {codespine-0.5.4 → codespine-0.5.6}/PKG-INFO +38 -1
  2. {codespine-0.5.4 → codespine-0.5.6}/README.md +37 -0
  3. {codespine-0.5.4 → codespine-0.5.6}/codespine/__init__.py +1 -1
  4. {codespine-0.5.4 → codespine-0.5.6}/codespine/analysis/impact.py +83 -41
  5. {codespine-0.5.4 → codespine-0.5.6}/codespine/cli.py +52 -4
  6. {codespine-0.5.4 → codespine-0.5.6}/codespine/config.py +4 -0
  7. {codespine-0.5.4 → codespine-0.5.6}/codespine/db/schema.py +5 -2
  8. {codespine-0.5.4 → codespine-0.5.6}/codespine/db/store.py +187 -12
  9. {codespine-0.5.4 → codespine-0.5.6}/codespine/indexer/engine.py +160 -68
  10. {codespine-0.5.4 → codespine-0.5.6}/codespine/mcp/server.py +154 -63
  11. codespine-0.5.6/codespine/overlay/__init__.py +23 -0
  12. codespine-0.5.6/codespine/overlay/git_state.py +35 -0
  13. codespine-0.5.6/codespine/overlay/merge.py +189 -0
  14. codespine-0.5.6/codespine/overlay/store.py +492 -0
  15. {codespine-0.5.4 → codespine-0.5.6}/codespine/search/hybrid.py +26 -23
  16. codespine-0.5.6/codespine/watch/watcher.py +261 -0
  17. {codespine-0.5.4 → codespine-0.5.6}/codespine.egg-info/PKG-INFO +38 -1
  18. {codespine-0.5.4 → codespine-0.5.6}/codespine.egg-info/SOURCES.txt +7 -1
  19. {codespine-0.5.4 → codespine-0.5.6}/pyproject.toml +1 -1
  20. codespine-0.5.6/tests/test_overlay.py +231 -0
  21. codespine-0.5.6/tests/test_store_recovery.py +52 -0
  22. codespine-0.5.4/codespine/watch/watcher.py +0 -75
  23. {codespine-0.5.4 → codespine-0.5.6}/LICENSE +0 -0
  24. {codespine-0.5.4 → codespine-0.5.6}/codespine/analysis/__init__.py +0 -0
  25. {codespine-0.5.4 → codespine-0.5.6}/codespine/analysis/community.py +0 -0
  26. {codespine-0.5.4 → codespine-0.5.6}/codespine/analysis/context.py +0 -0
  27. {codespine-0.5.4 → codespine-0.5.6}/codespine/analysis/coupling.py +0 -0
  28. {codespine-0.5.4 → codespine-0.5.6}/codespine/analysis/crossmodule.py +0 -0
  29. {codespine-0.5.4 → codespine-0.5.6}/codespine/analysis/deadcode.py +0 -0
  30. {codespine-0.5.4 → codespine-0.5.6}/codespine/analysis/flow.py +0 -0
  31. {codespine-0.5.4 → codespine-0.5.6}/codespine/db/__init__.py +0 -0
  32. {codespine-0.5.4 → codespine-0.5.6}/codespine/diff/__init__.py +0 -0
  33. {codespine-0.5.4 → codespine-0.5.6}/codespine/diff/branch_diff.py +0 -0
  34. {codespine-0.5.4 → codespine-0.5.6}/codespine/indexer/__init__.py +0 -0
  35. {codespine-0.5.4 → codespine-0.5.6}/codespine/indexer/call_resolver.py +0 -0
  36. {codespine-0.5.4 → codespine-0.5.6}/codespine/indexer/java_parser.py +0 -0
  37. {codespine-0.5.4 → codespine-0.5.6}/codespine/indexer/symbol_builder.py +0 -0
  38. {codespine-0.5.4 → codespine-0.5.6}/codespine/mcp/__init__.py +0 -0
  39. {codespine-0.5.4 → codespine-0.5.6}/codespine/noise/__init__.py +0 -0
  40. {codespine-0.5.4 → codespine-0.5.6}/codespine/noise/blocklist.py +0 -0
  41. {codespine-0.5.4 → codespine-0.5.6}/codespine/search/__init__.py +0 -0
  42. {codespine-0.5.4 → codespine-0.5.6}/codespine/search/bm25.py +0 -0
  43. {codespine-0.5.4 → codespine-0.5.6}/codespine/search/fuzzy.py +0 -0
  44. {codespine-0.5.4 → codespine-0.5.6}/codespine/search/rrf.py +0 -0
  45. {codespine-0.5.4 → codespine-0.5.6}/codespine/search/vector.py +0 -0
  46. {codespine-0.5.4 → codespine-0.5.6}/codespine/watch/__init__.py +0 -0
  47. {codespine-0.5.4 → codespine-0.5.6}/codespine.egg-info/dependency_links.txt +0 -0
  48. {codespine-0.5.4 → codespine-0.5.6}/codespine.egg-info/entry_points.txt +0 -0
  49. {codespine-0.5.4 → codespine-0.5.6}/codespine.egg-info/requires.txt +0 -0
  50. {codespine-0.5.4 → codespine-0.5.6}/codespine.egg-info/top_level.txt +0 -0
  51. {codespine-0.5.4 → codespine-0.5.6}/gindex.py +0 -0
  52. {codespine-0.5.4 → codespine-0.5.6}/setup.cfg +0 -0
  53. {codespine-0.5.4 → codespine-0.5.6}/tests/test_branch_diff_normalize.py +0 -0
  54. {codespine-0.5.4 → codespine-0.5.6}/tests/test_call_resolver.py +0 -0
  55. {codespine-0.5.4 → codespine-0.5.6}/tests/test_community_detection.py +0 -0
  56. {codespine-0.5.4 → codespine-0.5.6}/tests/test_deadcode.py +0 -0
  57. {codespine-0.5.4 → codespine-0.5.6}/tests/test_index_and_hybrid.py +0 -0
  58. {codespine-0.5.4 → codespine-0.5.6}/tests/test_java_parser.py +0 -0
  59. {codespine-0.5.4 → codespine-0.5.6}/tests/test_multimodule_index.py +0 -0
  60. {codespine-0.5.4 → codespine-0.5.6}/tests/test_search_ranking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.5.4
3
+ Version: 0.5.6
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -70,12 +70,15 @@ Instead of having an agent open dozens of `.java` files to answer one question,
70
70
 
71
71
  It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
72
72
 
73
+ It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
74
+
73
75
  ## Why It Saves Tokens
74
76
 
75
77
  - One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
76
78
  - Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
77
79
  - Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
78
80
  - Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
81
+ - Active edits stay smooth. Dirty files are kept in an overlay and merged into fast queries until you commit, instead of hammering the main graph DB on each change.
79
82
 
80
83
  ## Install
81
84
 
@@ -100,6 +103,32 @@ pip install "codespine[ml]"
100
103
  - Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
101
104
  - MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
102
105
 
106
+ ## Editing Without Stale Indexes
107
+
108
+ CodeSpine uses a two-layer model:
109
+
110
+ - Base index: last committed state
111
+ - Dirty overlay: uncommitted Java changes
112
+
113
+ Fast tools read merged `base + overlay` state by default:
114
+
115
+ - `search`
116
+ - `context`
117
+ - `impact`
118
+ - MCP `search_hybrid`
119
+ - MCP `find_symbol`
120
+ - MCP `get_symbol_context`
121
+ - MCP `get_impact`
122
+
123
+ Deep analyses stay committed-only until promotion:
124
+
125
+ - `deadcode`
126
+ - `flow`
127
+ - `community`
128
+ - `coupling`
129
+
130
+ `codespine watch` updates the dirty overlay after a debounce window, then promotes it into the base index when local `HEAD` changes.
131
+
103
132
  ## Quick Start
104
133
 
105
134
  Index a repo:
@@ -205,6 +234,7 @@ codespine analyse <path> --full
205
234
  codespine analyse <path> --deep
206
235
  codespine analyse <path> --embed
207
236
  codespine watch --path .
237
+ codespine watch --path . --overlay-debounce-ms 1500
208
238
  codespine search "query"
209
239
  codespine context "symbol"
210
240
  codespine impact "symbol"
@@ -215,6 +245,9 @@ codespine coupling
215
245
  codespine diff main..feature
216
246
  codespine stats
217
247
  codespine list
248
+ codespine overlay-status
249
+ codespine overlay-promote
250
+ codespine overlay-clear
218
251
  codespine clear-project <project_id>
219
252
  codespine clear-index
220
253
  ```
@@ -247,6 +280,8 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
247
280
 
248
281
  Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
249
282
 
283
+ When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
284
+
250
285
  `--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
251
286
 
252
287
  ## Runtime Files
@@ -256,10 +291,12 @@ Use it when you want architecture-level context. Skip it when you just need the
256
291
  - `~/.codespine.log` - server log
257
292
  - `~/.codespine_embedding_cache.json` - embedding cache
258
293
  - `~/.codespine_index_meta/` - incremental file metadata cache
294
+ - `~/.codespine_overlay/` - uncommitted dirty overlay state
259
295
 
260
296
  ## Notes
261
297
 
262
298
  - `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
299
+ - `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
263
300
  - `codespine clear-index` rebuilds the local index database from scratch.
264
301
  - For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
265
302
 
@@ -6,12 +6,15 @@ Instead of having an agent open dozens of `.java` files to answer one question,
6
6
 
7
7
  It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
8
8
 
9
+ It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
10
+
9
11
  ## Why It Saves Tokens
10
12
 
11
13
  - One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
12
14
  - Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
13
15
  - Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
14
16
  - Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
17
+ - Active edits stay smooth. Dirty files are kept in an overlay and merged into fast queries until you commit, instead of hammering the main graph DB on each change.
15
18
 
16
19
  ## Install
17
20
 
@@ -36,6 +39,32 @@ pip install "codespine[ml]"
36
39
  - Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
37
40
  - MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
38
41
 
42
+ ## Editing Without Stale Indexes
43
+
44
+ CodeSpine uses a two-layer model:
45
+
46
+ - Base index: last committed state
47
+ - Dirty overlay: uncommitted Java changes
48
+
49
+ Fast tools read merged `base + overlay` state by default:
50
+
51
+ - `search`
52
+ - `context`
53
+ - `impact`
54
+ - MCP `search_hybrid`
55
+ - MCP `find_symbol`
56
+ - MCP `get_symbol_context`
57
+ - MCP `get_impact`
58
+
59
+ Deep analyses stay committed-only until promotion:
60
+
61
+ - `deadcode`
62
+ - `flow`
63
+ - `community`
64
+ - `coupling`
65
+
66
+ `codespine watch` updates the dirty overlay after a debounce window, then promotes it into the base index when local `HEAD` changes.
67
+
39
68
  ## Quick Start
40
69
 
41
70
  Index a repo:
@@ -141,6 +170,7 @@ codespine analyse <path> --full
141
170
  codespine analyse <path> --deep
142
171
  codespine analyse <path> --embed
143
172
  codespine watch --path .
173
+ codespine watch --path . --overlay-debounce-ms 1500
144
174
  codespine search "query"
145
175
  codespine context "symbol"
146
176
  codespine impact "symbol"
@@ -151,6 +181,9 @@ codespine coupling
151
181
  codespine diff main..feature
152
182
  codespine stats
153
183
  codespine list
184
+ codespine overlay-status
185
+ codespine overlay-promote
186
+ codespine overlay-clear
154
187
  codespine clear-project <project_id>
155
188
  codespine clear-index
156
189
  ```
@@ -183,6 +216,8 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
183
216
 
184
217
  Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
185
218
 
219
+ When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
220
+
186
221
  `--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
187
222
 
188
223
  ## Runtime Files
@@ -192,10 +227,12 @@ Use it when you want architecture-level context. Skip it when you just need the
192
227
  - `~/.codespine.log` - server log
193
228
  - `~/.codespine_embedding_cache.json` - embedding cache
194
229
  - `~/.codespine_index_meta/` - incremental file metadata cache
230
+ - `~/.codespine_overlay/` - uncommitted dirty overlay state
195
231
 
196
232
  ## Notes
197
233
 
198
234
  - `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
235
+ - `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
199
236
  - `codespine clear-index` rebuilds the local index database from scratch.
200
237
  - For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
201
238
 
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.5.4"
4
+ __version__ = "0.5.6"
@@ -2,22 +2,36 @@ from __future__ import annotations
2
2
 
3
3
  from collections import defaultdict, deque
4
4
 
5
+ from codespine.overlay.merge import merged_call_edges, merged_method_records, merged_symbol_records
6
+
5
7
 
6
8
  def _resolve_symbol_ids(store, symbol_query: str, project: str | None = None) -> list[str]:
7
- project_clause = "AND f.project_id = $proj" if project else ""
8
- params: dict = {"q": symbol_query}
9
- if project:
10
- params["proj"] = project
11
- recs = store.query_records(
12
- f"""
13
- MATCH (s:Symbol), (f:File)
14
- WHERE s.file_id = f.id {project_clause}
15
- AND (s.id = $q OR lower(s.name) = lower($q) OR lower(s.fqname) = lower($q) OR lower(s.fqname) CONTAINS lower($q))
16
- RETURN s.id as id
17
- LIMIT 50
18
- """,
19
- params,
20
- )
9
+ overlay_store = getattr(store, "overlay_store", None)
10
+ if overlay_store is not None:
11
+ recs = []
12
+ needle = symbol_query.lower()
13
+ for rec in merged_symbol_records(store, overlay_store, project=project):
14
+ name = str(rec.get("name") or "").lower()
15
+ fqname = str(rec.get("fqname") or "").lower()
16
+ if rec.get("id") == symbol_query or name == needle or fqname == needle or needle in fqname:
17
+ recs.append({"id": rec["id"]})
18
+ if len(recs) >= 50:
19
+ break
20
+ else:
21
+ project_clause = "AND f.project_id = $proj" if project else ""
22
+ params: dict = {"q": symbol_query}
23
+ if project:
24
+ params["proj"] = project
25
+ recs = store.query_records(
26
+ f"""
27
+ MATCH (s:Symbol), (f:File)
28
+ WHERE s.file_id = f.id {project_clause}
29
+ AND (s.id = $q OR lower(s.name) = lower($q) OR lower(s.fqname) = lower($q) OR lower(s.fqname) CONTAINS lower($q))
30
+ RETURN s.id as id
31
+ LIMIT 50
32
+ """,
33
+ params,
34
+ )
21
35
  return [r["id"] for r in recs]
22
36
 
23
37
 
@@ -30,15 +44,21 @@ def _resolve_method_metadata(store, method_ids: list[str]) -> dict[str, dict]:
30
44
  """
31
45
  if not method_ids:
32
46
  return {}
33
- recs = store.query_records(
34
- """
35
- MATCH (m:Method), (c:Class), (f:File)
36
- WHERE m.id IN $ids AND m.class_id = c.id AND c.file_id = f.id
37
- RETURN m.id as id, m.name as name, m.signature as fqname,
38
- c.fqcn as class_fqcn, f.path as file_path, f.project_id as project_id
39
- """,
40
- {"ids": method_ids},
41
- )
47
+ overlay_store = getattr(store, "overlay_store", None)
48
+ if overlay_store is not None:
49
+ recs = [r for r in merged_method_records(store, overlay_store) if r.get("id") in set(method_ids)]
50
+ for rec in recs:
51
+ rec["fqname"] = rec.get("signature")
52
+ else:
53
+ recs = store.query_records(
54
+ """
55
+ MATCH (m:Method), (c:Class), (f:File)
56
+ WHERE m.id IN $ids AND m.class_id = c.id AND c.file_id = f.id
57
+ RETURN m.id as id, m.name as name, m.signature as fqname,
58
+ c.fqcn as class_fqcn, f.path as file_path, f.project_id as project_id
59
+ """,
60
+ {"ids": method_ids},
61
+ )
42
62
  return {r["id"]: r for r in recs}
43
63
 
44
64
 
@@ -47,16 +67,33 @@ def analyze_impact(store, symbol_query: str, max_depth: int = 4, project: str |
47
67
  if not target_symbol_ids:
48
68
  return {"target": symbol_query, "depth_groups": {"1": [], "2": [], "3+": []}}
49
69
 
50
- symbol_to_method = {
51
- r["sid"]: r["mid"]
52
- for r in store.query_records(
53
- """
54
- MATCH (s:Symbol),(m:Method)
55
- WHERE s.kind = 'method' AND s.fqname CONTAINS m.signature
56
- RETURN s.id as sid, m.id as mid
57
- """
58
- )
59
- }
70
+ overlay_store = getattr(store, "overlay_store", None)
71
+ if overlay_store is not None:
72
+ methods = merged_method_records(store, overlay_store, project=project)
73
+ symbols = merged_symbol_records(store, overlay_store, project=project)
74
+ fqname_and_file_to_method = {
75
+ (f"{rec.get('class_fqcn')}#{rec.get('signature')}", rec.get("file_id")): rec["id"]
76
+ for rec in methods
77
+ }
78
+ symbol_to_method = {}
79
+ for rec in symbols:
80
+ if rec.get("kind") != "method":
81
+ continue
82
+ method_key = (rec.get("fqname"), rec.get("file_id"))
83
+ method_id = fqname_and_file_to_method.get(method_key)
84
+ if method_id:
85
+ symbol_to_method[rec["id"]] = method_id
86
+ else:
87
+ symbol_to_method = {
88
+ r["sid"]: r["mid"]
89
+ for r in store.query_records(
90
+ """
91
+ MATCH (s:Symbol),(m:Method)
92
+ WHERE s.kind = 'method' AND s.fqname CONTAINS m.signature
93
+ RETURN s.id as sid, m.id as mid
94
+ """
95
+ )
96
+ }
60
97
 
61
98
  target_method_ids = [symbol_to_method[sid] for sid in target_symbol_ids if sid in symbol_to_method]
62
99
  if not target_method_ids:
@@ -64,14 +101,19 @@ def analyze_impact(store, symbol_query: str, max_depth: int = 4, project: str |
64
101
 
65
102
  # Load all call edges – cross-project callers are included intentionally so
66
103
  # impact analysis surfaces inter-module dependencies.
67
- edges = store.query_records(
68
- """
69
- MATCH (a:Method)-[r:CALLS]->(b:Method)
70
- RETURN a.id as src, b.id as dst, 'CALLS' as edge_type,
71
- coalesce(r.confidence, 0.5) as confidence,
72
- coalesce(r.reason, 'unknown') as reason
73
- """
74
- )
104
+ if overlay_store is not None:
105
+ edges = merged_call_edges(store, overlay_store, project=project)
106
+ for edge in edges:
107
+ edge["edge_type"] = "CALLS"
108
+ else:
109
+ edges = store.query_records(
110
+ """
111
+ MATCH (a:Method)-[r:CALLS]->(b:Method)
112
+ RETURN a.id as src, b.id as dst, 'CALLS' as edge_type,
113
+ coalesce(r.confidence, 0.5) as confidence,
114
+ coalesce(r.reason, 'unknown') as reason
115
+ """
116
+ )
75
117
 
76
118
  reverse_adj: dict[str, list[dict]] = defaultdict(list)
77
119
  for edge in edges:
@@ -24,7 +24,7 @@ from codespine.diff.branch_diff import compare_branches
24
24
  from codespine.indexer.engine import JavaIndexer
25
25
  from codespine.mcp.server import build_mcp_server
26
26
  from codespine.search.hybrid import hybrid_search
27
- from codespine.watch.watcher import run_watch_mode
27
+ from codespine.watch.watcher import clear_overlay, get_overlay_status, promote_overlay, run_watch_mode
28
28
 
29
29
  logging.basicConfig(filename=SETTINGS.log_file, level=logging.INFO)
30
30
  LOGGER = logging.getLogger(__name__)
@@ -414,10 +414,23 @@ def coupling(months: int, min_strength: float, min_cochanges: int, as_json: bool
414
414
  @main.command()
415
415
  @click.option("--path", default=".", show_default=True, type=click.Path(exists=True))
416
416
  @click.option("--global-interval", default=30, show_default=True, type=int)
417
- def watch(path: str, global_interval: int) -> None:
417
+ @click.option(
418
+ "--overlay-debounce-ms",
419
+ default=SETTINGS.default_overlay_debounce_ms,
420
+ show_default=True,
421
+ type=int,
422
+ )
423
+ @click.option("--promote-on-commit/--no-promote-on-commit", default=True, show_default=True)
424
+ def watch(path: str, global_interval: int, overlay_debounce_ms: int, promote_on_commit: bool) -> None:
418
425
  """Live re-indexing and periodic global analysis refresh."""
419
426
  store = GraphStore(read_only=False)
420
- run_watch_mode(store, os.path.abspath(path), global_interval=global_interval)
427
+ run_watch_mode(
428
+ store,
429
+ os.path.abspath(path),
430
+ global_interval=global_interval,
431
+ overlay_debounce_ms=overlay_debounce_ms,
432
+ promote_on_commit=promote_on_commit,
433
+ )
421
434
 
422
435
 
423
436
  @main.command()
@@ -521,6 +534,8 @@ def status(as_json: bool) -> None:
521
534
  pid = int(f.read().strip())
522
535
  except Exception:
523
536
  pid = None
537
+ store = GraphStore(read_only=True)
538
+ overlay = get_overlay_status(store)
524
539
  payload = {
525
540
  "running": running,
526
541
  "pid": pid,
@@ -528,10 +543,41 @@ def status(as_json: bool) -> None:
528
543
  "db_path": SETTINGS.db_path,
529
544
  "db_size_bytes": _db_size_bytes(SETTINGS.db_path),
530
545
  "log_file": SETTINGS.log_file,
546
+ "overlay_dir": SETTINGS.overlay_dir,
547
+ "overlay_projects": overlay,
531
548
  }
532
549
  _echo_json(payload, as_json)
533
550
 
534
551
 
552
+ @main.command("overlay-status")
553
+ @click.option("--project", default=None)
554
+ @click.option("--json", "as_json", is_flag=True)
555
+ def overlay_status_cmd(project: str | None, as_json: bool) -> None:
556
+ """Show dirty overlay status by project/module."""
557
+ store = GraphStore(read_only=True)
558
+ _echo_json(get_overlay_status(store, project=project), as_json)
559
+
560
+
561
+ @main.command("overlay-clear")
562
+ @click.option("--project", default=None)
563
+ @click.option("--json", "as_json", is_flag=True)
564
+ def overlay_clear_cmd(project: str | None, as_json: bool) -> None:
565
+ """Clear dirty overlay data without touching the committed base index."""
566
+ store = GraphStore(read_only=False)
567
+ result = {"cleared": clear_overlay(store, project=project)}
568
+ _echo_json(result, as_json)
569
+
570
+
571
+ @main.command("overlay-promote")
572
+ @click.option("--project", default=None)
573
+ @click.option("--json", "as_json", is_flag=True)
574
+ def overlay_promote_cmd(project: str | None, as_json: bool) -> None:
575
+ """Promote dirty overlay changes into the committed base index now."""
576
+ store = GraphStore(read_only=False)
577
+ result = {"promoted": promote_overlay(store, project=project, require_head_change=False)}
578
+ _echo_json(result, as_json)
579
+
580
+
535
581
  @main.command()
536
582
  @click.argument("query")
537
583
  @click.option("--json", "as_json", is_flag=True)
@@ -552,7 +598,7 @@ def clean(force: bool) -> None:
552
598
  if not force and not click.confirm("Remove local CodeSpine DB, PID, and logs?"):
553
599
  click.echo("Aborted.")
554
600
  return
555
- for path in [SETTINGS.pid_file, SETTINGS.log_file, SETTINGS.db_path]:
601
+ for path in [SETTINGS.pid_file, SETTINGS.log_file, SETTINGS.db_path, SETTINGS.overlay_dir]:
556
602
  if not os.path.exists(path):
557
603
  continue
558
604
  if os.path.isdir(path):
@@ -591,6 +637,7 @@ def clear_project_cmd(project_id: str, allow_running: bool) -> None:
591
637
  project_path = recs[0].get("path", "")
592
638
  store.clear_analysis_artifacts()
593
639
  store.clear_project(project_id)
640
+ store.overlay_store.clear_project(project_id)
594
641
  meta_path = JavaIndexer._meta_cache_path(project_id)
595
642
  if os.path.exists(meta_path):
596
643
  try:
@@ -615,6 +662,7 @@ def clear_index_cmd(allow_running: bool) -> None:
615
662
  store = GraphStore(read_only=False)
616
663
  projects = store.query_records("MATCH (p:Project) RETURN p.id as id")
617
664
  store.rebuild_empty_db()
665
+ store.overlay_store.clear_all()
618
666
  for p in projects:
619
667
  meta_path = JavaIndexer._meta_cache_path(p["id"])
620
668
  if os.path.exists(meta_path):
@@ -9,15 +9,19 @@ class Settings:
9
9
  log_file: str = os.path.expanduser("~/.codespine.log")
10
10
  embedding_cache_path: str = os.path.expanduser("~/.codespine_embedding_cache.json")
11
11
  index_meta_dir: str = os.path.expanduser("~/.codespine_index_meta")
12
+ overlay_dir: str = os.path.expanduser("~/.codespine_overlay")
12
13
  embedding_model: str = "BAAI/bge-small-en-v1.5"
13
14
  vector_dim: int = 384
14
15
  rrf_k: int = 60
15
16
  semantic_candidate_pool: int = 2000
16
17
  write_batch_size: int = 500
18
+ index_file_batch_size: int = 64
19
+ edge_write_batch_size: int = 2000
17
20
  default_coupling_months: int = 6
18
21
  default_min_coupling_strength: float = 0.3
19
22
  default_min_cochanges: int = 3
20
23
  default_global_interval_s: int = 30
24
+ default_overlay_debounce_ms: int = 1500
21
25
 
22
26
 
23
27
  SETTINGS = Settings()
@@ -10,7 +10,7 @@ NODE_TABLES: list[tuple[str, str]] = [
10
10
  ("SchemaMeta", "CREATE NODE TABLE SchemaMeta(key STRING, value STRING, PRIMARY KEY (key))"),
11
11
  (
12
12
  "Project",
13
- "CREATE NODE TABLE Project(id STRING, path STRING, language STRING, indexed_at STRING, PRIMARY KEY (id))",
13
+ "CREATE NODE TABLE Project(id STRING, path STRING, language STRING, indexed_at STRING, indexed_commit STRING, overlay_dirty BOOL, PRIMARY KEY (id))",
14
14
  ),
15
15
  (
16
16
  "File",
@@ -81,5 +81,8 @@ def ensure_schema(conn) -> None:
81
81
 
82
82
  _safe_execute(
83
83
  conn,
84
- "MERGE (s:SchemaMeta {key: 'schema_version'}) SET s.value = '3'",
84
+ "MERGE (s:SchemaMeta {key: 'schema_version'}) SET s.value = '4'",
85
85
  )
86
+
87
+ _safe_execute(conn, "ALTER TABLE Project ADD indexed_commit STRING DEFAULT ''")
88
+ _safe_execute(conn, "ALTER TABLE Project ADD overlay_dirty BOOL DEFAULT false")