codespine 0.9.8__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. codespine-1.0.0/PKG-INFO +647 -0
  2. codespine-1.0.0/README.md +582 -0
  3. {codespine-0.9.8 → codespine-1.0.0}/codespine/__init__.py +1 -1
  4. codespine-1.0.0/codespine/cache/__init__.py +4 -0
  5. codespine-1.0.0/codespine/cache/result_cache.py +167 -0
  6. {codespine-0.9.8 → codespine-1.0.0}/codespine/cli.py +39 -3
  7. {codespine-0.9.8 → codespine-1.0.0}/codespine/config.py +1 -1
  8. codespine-1.0.0/codespine/db/_cypher_compat.py +309 -0
  9. {codespine-0.9.8 → codespine-1.0.0}/codespine/db/duckdb_store.py +26 -3
  10. {codespine-0.9.8 → codespine-1.0.0}/codespine/mcp/server.py +230 -5
  11. {codespine-0.9.8 → codespine-1.0.0}/codespine/search/vector.py +21 -4
  12. codespine-1.0.0/codespine.egg-info/PKG-INFO +647 -0
  13. {codespine-0.9.8 → codespine-1.0.0}/codespine.egg-info/SOURCES.txt +5 -0
  14. {codespine-0.9.8 → codespine-1.0.0}/codespine.egg-info/requires.txt +1 -4
  15. {codespine-0.9.8 → codespine-1.0.0}/pyproject.toml +3 -6
  16. codespine-1.0.0/tests/test_cypher_compat.py +303 -0
  17. codespine-1.0.0/tests/test_result_cache.py +179 -0
  18. codespine-0.9.8/PKG-INFO +0 -481
  19. codespine-0.9.8/README.md +0 -414
  20. codespine-0.9.8/codespine.egg-info/PKG-INFO +0 -481
  21. {codespine-0.9.8 → codespine-1.0.0}/LICENSE +0 -0
  22. {codespine-0.9.8 → codespine-1.0.0}/codespine/analysis/__init__.py +0 -0
  23. {codespine-0.9.8 → codespine-1.0.0}/codespine/analysis/community.py +0 -0
  24. {codespine-0.9.8 → codespine-1.0.0}/codespine/analysis/context.py +0 -0
  25. {codespine-0.9.8 → codespine-1.0.0}/codespine/analysis/coupling.py +0 -0
  26. {codespine-0.9.8 → codespine-1.0.0}/codespine/analysis/crossmodule.py +0 -0
  27. {codespine-0.9.8 → codespine-1.0.0}/codespine/analysis/deadcode.py +0 -0
  28. {codespine-0.9.8 → codespine-1.0.0}/codespine/analysis/flow.py +0 -0
  29. {codespine-0.9.8 → codespine-1.0.0}/codespine/analysis/impact.py +0 -0
  30. {codespine-0.9.8 → codespine-1.0.0}/codespine/db/__init__.py +0 -0
  31. {codespine-0.9.8 → codespine-1.0.0}/codespine/db/schema.py +0 -0
  32. {codespine-0.9.8 → codespine-1.0.0}/codespine/db/store.py +0 -0
  33. {codespine-0.9.8 → codespine-1.0.0}/codespine/diff/__init__.py +0 -0
  34. {codespine-0.9.8 → codespine-1.0.0}/codespine/diff/branch_diff.py +0 -0
  35. {codespine-0.9.8 → codespine-1.0.0}/codespine/guide.py +0 -0
  36. {codespine-0.9.8 → codespine-1.0.0}/codespine/indexer/__init__.py +0 -0
  37. {codespine-0.9.8 → codespine-1.0.0}/codespine/indexer/call_resolver.py +0 -0
  38. {codespine-0.9.8 → codespine-1.0.0}/codespine/indexer/di_resolver.py +0 -0
  39. {codespine-0.9.8 → codespine-1.0.0}/codespine/indexer/engine.py +0 -0
  40. {codespine-0.9.8 → codespine-1.0.0}/codespine/indexer/java_parser.py +0 -0
  41. {codespine-0.9.8 → codespine-1.0.0}/codespine/indexer/symbol_builder.py +0 -0
  42. {codespine-0.9.8 → codespine-1.0.0}/codespine/mcp/__init__.py +0 -0
  43. {codespine-0.9.8 → codespine-1.0.0}/codespine/noise/__init__.py +0 -0
  44. {codespine-0.9.8 → codespine-1.0.0}/codespine/noise/blocklist.py +0 -0
  45. {codespine-0.9.8 → codespine-1.0.0}/codespine/overlay/__init__.py +0 -0
  46. {codespine-0.9.8 → codespine-1.0.0}/codespine/overlay/git_state.py +0 -0
  47. {codespine-0.9.8 → codespine-1.0.0}/codespine/overlay/merge.py +0 -0
  48. {codespine-0.9.8 → codespine-1.0.0}/codespine/overlay/store.py +0 -0
  49. {codespine-0.9.8 → codespine-1.0.0}/codespine/search/__init__.py +0 -0
  50. {codespine-0.9.8 → codespine-1.0.0}/codespine/search/bm25.py +0 -0
  51. {codespine-0.9.8 → codespine-1.0.0}/codespine/search/fuzzy.py +0 -0
  52. {codespine-0.9.8 → codespine-1.0.0}/codespine/search/hybrid.py +0 -0
  53. {codespine-0.9.8 → codespine-1.0.0}/codespine/search/rrf.py +0 -0
  54. {codespine-0.9.8 → codespine-1.0.0}/codespine/sharding/__init__.py +0 -0
  55. {codespine-0.9.8 → codespine-1.0.0}/codespine/sharding/router.py +0 -0
  56. {codespine-0.9.8 → codespine-1.0.0}/codespine/sharding/store.py +0 -0
  57. {codespine-0.9.8 → codespine-1.0.0}/codespine/watch/__init__.py +0 -0
  58. {codespine-0.9.8 → codespine-1.0.0}/codespine/watch/git_hook.py +0 -0
  59. {codespine-0.9.8 → codespine-1.0.0}/codespine/watch/watcher.py +0 -0
  60. {codespine-0.9.8 → codespine-1.0.0}/codespine.egg-info/dependency_links.txt +0 -0
  61. {codespine-0.9.8 → codespine-1.0.0}/codespine.egg-info/entry_points.txt +0 -0
  62. {codespine-0.9.8 → codespine-1.0.0}/codespine.egg-info/top_level.txt +0 -0
  63. {codespine-0.9.8 → codespine-1.0.0}/gindex.py +0 -0
  64. {codespine-0.9.8 → codespine-1.0.0}/setup.cfg +0 -0
  65. {codespine-0.9.8 → codespine-1.0.0}/tests/test_branch_diff_normalize.py +0 -0
  66. {codespine-0.9.8 → codespine-1.0.0}/tests/test_call_resolver.py +0 -0
  67. {codespine-0.9.8 → codespine-1.0.0}/tests/test_community_detection.py +0 -0
  68. {codespine-0.9.8 → codespine-1.0.0}/tests/test_deadcode.py +0 -0
  69. {codespine-0.9.8 → codespine-1.0.0}/tests/test_duckdb_store.py +0 -0
  70. {codespine-0.9.8 → codespine-1.0.0}/tests/test_index_and_hybrid.py +0 -0
  71. {codespine-0.9.8 → codespine-1.0.0}/tests/test_java_parser.py +0 -0
  72. {codespine-0.9.8 → codespine-1.0.0}/tests/test_multimodule_index.py +0 -0
  73. {codespine-0.9.8 → codespine-1.0.0}/tests/test_overlay.py +0 -0
  74. {codespine-0.9.8 → codespine-1.0.0}/tests/test_search_ranking.py +0 -0
  75. {codespine-0.9.8 → codespine-1.0.0}/tests/test_sharding.py +0 -0
  76. {codespine-0.9.8 → codespine-1.0.0}/tests/test_store_recovery.py +0 -0
@@ -0,0 +1,647 @@
1
+ Metadata-Version: 2.4
2
+ Name: codespine
3
+ Version: 1.0.0
4
+ Summary: Local Java code intelligence indexer backed by a graph database
5
+ Author: CodeSpine contributors
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 CodeSpine contributors
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/vinayak3022/codeSpine
29
+ Project-URL: Repository, https://github.com/vinayak3022/codeSpine
30
+ Project-URL: Issues, https://github.com/vinayak3022/codeSpine/issues
31
+ Keywords: java,code-indexing,graph,kuzu,mcp
32
+ Classifier: Development Status :: 3 - Alpha
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Programming Language :: Python :: 3.10
37
+ Classifier: Programming Language :: Python :: 3.11
38
+ Classifier: Programming Language :: Python :: 3.12
39
+ Classifier: Programming Language :: Python :: 3.13
40
+ Classifier: Topic :: Software Development :: Libraries
41
+ Classifier: Topic :: Software Development :: Quality Assurance
42
+ Requires-Python: >=3.10
43
+ Description-Content-Type: text/markdown
44
+ License-File: LICENSE
45
+ Requires-Dist: click
46
+ Requires-Dist: duckdb>=0.10.0
47
+ Requires-Dist: kuzu
48
+ Requires-Dist: tree-sitter
49
+ Requires-Dist: tree-sitter-java
50
+ Requires-Dist: fastmcp>=2.3.0
51
+ Requires-Dist: psutil
52
+ Requires-Dist: watchfiles
53
+ Provides-Extra: ml
54
+ Requires-Dist: sentence-transformers; extra == "ml"
55
+ Requires-Dist: numpy; extra == "ml"
56
+ Provides-Extra: community
57
+ Requires-Dist: igraph; extra == "community"
58
+ Requires-Dist: leidenalg; extra == "community"
59
+ Provides-Extra: full
60
+ Requires-Dist: sentence-transformers; extra == "full"
61
+ Requires-Dist: numpy; extra == "full"
62
+ Requires-Dist: igraph; extra == "full"
63
+ Requires-Dist: leidenalg; extra == "full"
64
+ Dynamic: license-file
65
+
66
+ # CodeSpine
67
+
68
+ **v1.0.0** — Local Java code intelligence for coding agents, backed by a graph database.
69
+
70
+ CodeSpine cuts token burn for coding agents working on Java codebases.
71
+
72
+ Instead of having an agent open dozens of `.java` files to answer one question, CodeSpine indexes the codebase once and serves the structure over MCP. The agent asks for symbols, callers, impact, flows, dead code, and module boundaries directly — fewer file reads, fewer wasted context windows, fewer hallucinated code paths.
73
+
74
+ It indexes classes, methods, calls, type relationships, DI bindings, cross-module links, git coupling, dead-code candidates, and execution flows so agents work from graph answers first and source files second.
75
+
76
+ File changes are written directly to the graph and are immediately queryable — no stale overlay merging, no OOM accumulation. The MCP daemon reloads from an atomic read replica the moment indexing or watch mode completes a batch.
77
+
78
+ ---
79
+
80
+ ## Why It Saves Tokens
81
+
82
+ - **One MCP call replaces many file opens.** `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
83
+ - **Search is structure-aware.** Ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
84
+ - **DI bindings are first-class.** `@Inject`, `@Autowired`, `@Bean`, and `@Provides` edges are resolved and included in impact analysis — Spring and Guice consumers are never missed.
85
+ - **Multi-module repos stay scoped.** Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
86
+ - **Repeat sessions get cheaper.** Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
87
+ - **Active edits are visible immediately.** Watch mode writes changes directly to the graph (not a slow overlay), so every MCP query reflects the latest file save.
88
+ - **Natural language dispatch.** `ask("what breaks if I change PaymentService?")` routes to the right tool automatically, reducing agent planning overhead.
89
+
90
+ ---
91
+
92
+ ## Install
93
+
94
+ ```bash
95
+ pip install codespine
96
+ ```
97
+
98
+ Optional semantic search (sentence-transformers):
99
+
100
+ ```bash
101
+ pip install "codespine[ml]"
102
+ ```
103
+
104
+ Everything at once (ml + community detection):
105
+
106
+ ```bash
107
+ pip install "codespine[full]"
108
+ ```
109
+
110
+ ### One-time model download (for semantic search)
111
+
112
+ ```bash
113
+ codespine install-model
114
+ ```
115
+
116
+ Downloads and caches the embedding model. Only needed once. After this, `--embed` works without any network access.
117
+
118
+ ---
119
+
120
+ ## Quick Start
121
+
122
+ ```bash
123
+ # 1. Index a project
124
+ codespine analyse /path/to/java-project
125
+
126
+ # 2. (Optional) Run the expensive deep passes: communities, flows, dead code, coupling
127
+ # Auto-enabled for repos with ≤ 3,000 files; use --deep to force on larger repos.
128
+ codespine analyse /path/to/java-project --deep
129
+
130
+ # 3. (Optional) Add semantic embeddings for concept-level search
131
+ codespine analyse /path/to/java-project --embed
132
+
133
+ # 4. Start MCP server (foreground; your IDE manages the process)
134
+ codespine mcp
135
+ ```
136
+
137
+ Typical output:
138
+
139
+ ```
140
+ $ codespine analyse .
141
+ Walking files... 142 files found
142
+ Index mode... incremental (8 files to index, 0 deleted)
143
+ Parsing code... 8/8
144
+ Tracing calls... 847 calls resolved
145
+ Analyzing DI bindings... 63 INJECTS edges, 14 BINDS_INTERFACE edges
146
+ Analyzing types... 234 type relationships
147
+ Cross-module linking... skipped (single module)
148
+ Detecting communities... 8 clusters found
149
+ Detecting execution flows... 34 processes found
150
+ Finding dead code... 12 unreachable symbols
151
+ Analyzing git history... 18 coupled file pairs
152
+ Generating embeddings... 623 vectors stored
153
+
154
+ Done in 4.2s — 623 symbols, 1,847 edges, 8 clusters, 34 flows
155
+ Publishing read replica... MCP will reload automatically
156
+ ```
157
+
158
+ Each analysis phase streams live progress. The final step publishes a read replica so the MCP daemon picks up the new index without restarting.
159
+
160
+ ---
161
+
162
+ ## MCP Configuration
163
+
164
+ Foreground server:
165
+
166
+ ```bash
167
+ codespine mcp
168
+ ```
169
+
170
+ Minimal `mcp.json` / Claude Desktop config:
171
+
172
+ ```json
173
+ {
174
+ "mcpServers": {
175
+ "codespine": {
176
+ "command": "codespine",
177
+ "args": ["mcp"]
178
+ }
179
+ }
180
+ }
181
+ ```
182
+
183
+ If the client launches the wrong Python environment, use the absolute binary path:
184
+
185
+ ```json
186
+ {
187
+ "mcpServers": {
188
+ "codespine": {
189
+ "command": "/absolute/path/to/codespine",
190
+ "args": ["mcp"]
191
+ }
192
+ }
193
+ }
194
+ ```
195
+
196
+ ### Agent Onboarding
197
+
198
+ When an agent connects for the first time:
199
+
200
+ 1. **`guide()`** — structured catalog of every tool, by category, with recommended workflows and tips.
201
+ 2. **`get_capabilities()`** — what is indexed right now, which features are ready, and what is missing.
202
+
203
+ The same information is available from the CLI:
204
+
205
+ ```bash
206
+ codespine guide # tool catalog, workflows, tips
207
+ codespine guide --json # structured JSON for tooling
208
+ ```
209
+
210
+ ---
211
+
212
+ ## MCP Tools (45 total)
213
+
214
+ ### Discovery & Status
215
+
216
+ | Tool | Description |
217
+ |------|-------------|
218
+ | `guide()` | Tool catalog, workflows, and tips. Call first if new to CodeSpine. |
219
+ | `get_capabilities()` | What is indexed and which features are available right now. |
220
+ | `list_projects()` | All indexed projects with symbol/file counts. |
221
+ | `get_codebase_stats()` | Per-project stats: files, classes, methods, call edges, embeddings. |
222
+ | `list_packages(project)` | Java packages in the index. |
223
+ | `ping()` | Verify the MCP server is alive. |
224
+
225
+ ### Search & Lookup
226
+
227
+ | Tool | Description |
228
+ |------|-------------|
229
+ | `search_hybrid(query, k, project)` | Ranked symbol search (BM25 + vector + fuzzy via RRF) with `high/medium/low` confidence scores. |
230
+ | `find_symbol(name, kind, project, limit)` | Exact/prefix name lookup; returns `primary_match` flag and disambiguated overloads. |
231
+ | `get_symbol_context(query, max_depth, project)` | One-shot deep context: search + impact + community + flows. |
232
+ | `get_neighborhood(symbol, project)` | Callers (same project), `cross_project_callers` (other projects), callees, siblings, and override/implements links. |
233
+
234
+ ### Analysis
235
+
236
+ | Tool | Description |
237
+ |------|-------------|
238
+ | `get_impact(symbol, max_depth, project)` | Caller-tree BFS including DI consumers. `self_callers` separates same-class callers from `impacted_callers`. Cached for 5 min. |
239
+ | `find_injections(symbol, project)` | All `@Inject`/`@Autowired` consumers, `@Bean`/`@Provides` providers, and `@Component`/`@Service` implementations. |
240
+ | `detect_dead_code(limit, project, strict)` | Methods with no callers (Java-aware exemptions for tests, contracts, DI entry points). Cached for 5 min. |
241
+ | `trace_execution_flows(entry_symbol, max_depth, project)` | Execution paths from entry points through the call graph. |
242
+ | `get_symbol_community(symbol)` | Architectural community cluster for a symbol. |
243
+ | `get_change_coupling(days, min_strength, min_cochanges)` | Files that changed together in git history (default last 5 days). |
244
+
245
+ ### LLM-Native Tools
246
+
247
+ Higher-level tools designed to answer full agent questions in a single call, without the agent needing to know which underlying tool to call:
248
+
249
+ | Tool | Description |
250
+ |------|-------------|
251
+ | `ask(question, project)` | Keyword-based natural language dispatcher: routes "who calls X", "what breaks if Y", "explain Z", "find methods named …" to the right tool automatically. |
252
+ | `what_breaks(symbol, project)` | Plain-English blast-radius summary with `risk_level` (low / medium / high). |
253
+ | `explain(symbol, project)` | What a class or method does and how it fits in the architecture. |
254
+ | `read_symbols(file, symbols, project)` | Extract only the requested method source ranges from a file using tree-sitter — 60–70% token reduction vs. reading the whole file. |
255
+ | `semantic_summary(symbol, project)` | Condensed class view: name, package, extends, implements, public method signatures, annotations. ~80 tokens vs. ~800. |
256
+ | `get_api_surface(class_name, project)` | Public methods and fields only. |
257
+ | `file_context(file_path, project)` | Symbols in a file, callers/callees, community, co-change partners. |
258
+ | `pre_flight_check(file, symbols, change_type)` | Blast-radius check before writing: runs `get_impact` per symbol, returns total affected + risk level + test gap. |
259
+ | `related(symbol, limit, project)` | Symbols structurally related via co-change coupling, shared community, direct calls, or class siblings. |
260
+ | `rename_plan(symbol, new_name, project)` | **Safe cross-project rename plan.** Finds all declaration sites, call sites, and override sites and returns a `files_to_modify` list. No files are modified. |
261
+ | `test_coverage(symbol, project)` | Test methods that cover the given symbol (direct or depth-2 calls from `@Test` methods). |
262
+ | `diff_impact(git_ref, project)` | Graph-level impact analysis for all Java symbols changed since `git_ref`. Returns risk level and per-file affected counts. |
263
+ | `find_pattern(description, project)` | Structural and semantic pattern matching across the codebase. |
264
+
265
+ ### Git
266
+
267
+ | Tool | Description |
268
+ |------|-------------|
269
+ | `git_log(file_path, limit, project)` | Recent git commits for a path or project. |
270
+ | `git_diff(ref, file_path, project)` | Git diff (working tree vs. ref, or between two refs). |
271
+ | `compare_branches(base_ref, head_ref, project)` | Symbol-level diff between two git refs. |
272
+
273
+ ### Indexing & Watch
274
+
275
+ | Tool | Description |
276
+ |------|-------------|
277
+ | `analyse_project(path, full, deep, embed)` | Index a Java project (background subprocess). |
278
+ | `get_analyse_status()` | Poll background analysis progress (includes last 30 log lines). |
279
+ | `reindex_file(file_path, project)` | Re-index a single `.java` file (<1 s). Changes are immediately queryable. |
280
+ | `start_watch(path, install_hook)` | Watch for `.java` changes; write directly to graph. Pass `install_hook=True` to also install a post-commit git hook. |
281
+ | `stop_watch()` | Stop the background watch process. |
282
+ | `get_watch_status()` | Watch mode status: running, path, uptime. |
283
+
284
+ > **Auto-watch:** The MCP server automatically starts watching the most-recently-indexed project on startup if watch is not already running.
285
+
286
+ ### Overlay
287
+
288
+ | Tool | Description |
289
+ |------|-------------|
290
+ | `get_overlay_status(project)` | Uncommitted overlay state by project/module. |
291
+ | `promote_overlay(project)` | Commit dirty overlay into the base index. |
292
+ | `clear_overlay(project)` | Discard dirty overlay without changing the base. |
293
+
294
+ ### Reset
295
+
296
+ | Tool | Description |
297
+ |------|-------------|
298
+ | `reset_project(project_id)` | Remove all data for one project. |
299
+ | `reset_index()` | Remove ALL data across every project. |
300
+ | `force_reset_index()` | Emergency: delete data files when normal reset fails. |
301
+
302
+ ### Advanced
303
+
304
+ | Tool | Description |
305
+ |------|-------------|
306
+ | `run_cypher(query)` | Run a raw Cypher query against the graph DB. |
307
+
308
+ ---
309
+
310
+ ## CLI Reference
311
+
312
+ ```bash
313
+ # Indexing
314
+ codespine analyse <path> # incremental index (default)
315
+ codespine analyse <path> --full # full re-index from scratch
316
+ codespine analyse <path> --deep # + communities, flows, dead code, coupling
317
+ codespine analyse <path> --incremental-deep # incremental index + force deep passes
318
+ codespine analyse <path> --embed # + vector embeddings
319
+
320
+ # Live watch
321
+ codespine watch --path . # file-save-triggered direct-to-graph writes
322
+ codespine watch --path . --install-hook # also install post-commit git hook
323
+ codespine watch --path . --uninstall-hook # remove git hook
324
+
325
+ # Search & Analysis (CLI)
326
+ codespine search "query" # hybrid search
327
+ codespine context "symbol" # one-shot deep context
328
+ codespine impact "symbol" # caller-tree impact (includes DI consumers)
329
+ codespine deadcode # dead code candidates
330
+ codespine flow # execution flows
331
+ codespine community # architectural clusters
332
+ codespine coupling # git change coupling
333
+ codespine diff main..feature # symbol-level branch diff
334
+
335
+ # Status & Info
336
+ codespine stats # per-project stats (--shards for shard layout)
337
+ codespine list # indexed projects
338
+ codespine status # service and database status
339
+ codespine guide # tool catalog and workflows
340
+
341
+ # Overlay
342
+ codespine overlay-status # dirty overlay state
343
+ codespine overlay-promote # commit overlay to base
344
+ codespine overlay-clear # discard overlay
345
+
346
+ # Server Management
347
+ codespine start # launch background MCP server (daemon)
348
+ codespine stop # stop background MCP server
349
+ codespine mcp # foreground MCP (stdio, for IDE clients)
350
+
351
+ # Model & Setup
352
+ codespine install-model # download embedding model for semantic search
353
+ codespine setup # check dependencies
354
+
355
+ # Cleanup & Reset
356
+ codespine clear-project <project_id> # remove one project
357
+ codespine clear-index # remove all indexed data
358
+ codespine force-reset # emergency: delete all data files
359
+ ```
360
+
361
+ `analyse` defaults to incremental mode. Repeat runs only process changed files and are fast.
362
+
363
+ Deep analysis (`--deep`) now runs automatically for repos with ≤ 3,000 files. For larger repos, pass `--deep` explicitly. Use `--incremental-deep` when you want a fast file-only update but still want communities, flows, dead code, and coupling refreshed.
364
+
365
+ ---
366
+
367
+ ## Workspace and Module Detection
368
+
369
+ CodeSpine can index:
370
+
371
+ - A single Java repo
372
+ - A multi-module Maven or Gradle project
373
+ - A workspace directory containing multiple independent repos
374
+
375
+ **Project IDs:**
376
+
377
+ | Layout | Project ID |
378
+ |--------|------------|
379
+ | Single-module repo | `payments-service` |
380
+ | Multi-module repo: core | `payments-service::core` |
381
+ | Multi-module repo: api | `payments-service::api` |
382
+
383
+ Pass the same project ID to any MCP tool or CLI command that accepts `project=` to scope results.
384
+
385
+ ---
386
+
387
+ ## DI / Injection Analysis
388
+
389
+ CodeSpine resolves dependency injection bindings at index time and stores them as first-class graph edges.
390
+
391
+ **What is indexed:**
392
+
393
+ | Annotation | Edge |
394
+ |------------|------|
395
+ | `@Inject` / `@Autowired` field | `INJECTS(consumer → provider, confidence=0.85)` |
396
+ | `@Provides` / `@Bean` method | `INJECTS(config_class → return_type, confidence=0.90)` |
397
+ | `@Component` / `@Service` impl | `BINDS_INTERFACE(impl → interface, confidence=0.95)` |
398
+
399
+ **Impact on existing tools:**
400
+
401
+ - `get_impact("PaymentService")` includes classes that inject `PaymentService`, not just direct callers.
402
+ - `detect_dead_code` skips classes that are referenced only via DI edges.
403
+
404
+ **Dedicated tool:**
405
+
406
+ ```python
407
+ find_injections("PaymentProcessor")
408
+ # → @Inject/@Autowired consumers
409
+ # → @Bean/@Provides providers
410
+ # → @Component/@Service implementations
411
+ ```
412
+
413
+ ---
414
+
415
+ ## Instant Change Visibility
416
+
417
+ CodeSpine writes file changes directly to the graph — no O(N) overlay merge on every query.
418
+
419
+ When `codespine watch` detects a file save:
420
+
421
+ 1. Parses the changed file with tree-sitter
422
+ 2. Atomically clears and re-writes that file's methods, calls, and type relationships
423
+ 3. Snapshots the write DB to the read replica
424
+ 4. The MCP server picks up the new snapshot on its next tool call
425
+
426
+ Every tool — `search_hybrid`, `get_impact`, `get_symbol_context`, `find_injections` — reflects the latest file save within the debounce window (default 1–2 s).
427
+
428
+ ### Git Commit Auto Re-index
429
+
430
+ Watch mode polls `git HEAD` every 5 s. When HEAD changes it runs `git diff --name-only` to find the modified Java files and re-indexes only those — not the full project.
431
+
432
+ Install an optional post-commit hook so re-indexing fires immediately on every commit:
433
+
434
+ ```bash
435
+ codespine watch --path . --install-hook
436
+ ```
437
+
438
+ Or from MCP:
439
+
440
+ ```python
441
+ start_watch(path=".", install_hook=True)
442
+ ```
443
+
444
+ The hook is idempotent and can be removed:
445
+
446
+ ```bash
447
+ codespine watch --uninstall-hook --path .
448
+ ```
449
+
450
+ ---
451
+
452
+ ## Sharding (Multi-Shard Storage)
453
+
454
+ For large workspaces with many independent projects, CodeSpine distributes project data across multiple on-disk KùzuDB shards using a consistent hash ring.
455
+
456
+ **Default:** 4 shards stored under `~/.codespine/shards/{0,1,2,3}/db`.
457
+
458
+ **Key property — module co-location:** All modules of the same project always land on the same shard so cross-module call resolution stays local. `myapp::core` and `myapp::api` always share one shard.
459
+
460
+ **Parallel indexing:** Projects on different shards are indexed concurrently; modules on the same shard are indexed serially to avoid write contention.
461
+
462
+ **Configuration:**
463
+
464
+ ```bash
465
+ # Override shard count (applied at first use; changing later requires re-index)
466
+ export CODESPINE_SHARDS=8
467
+ codespine analyse /path/to/project
468
+ ```
469
+
470
+ **Shard topology:**
471
+
472
+ ```bash
473
+ codespine stats --shards
474
+ ```
475
+
476
+ **Programmatic access:**
477
+
478
+ ```python
479
+ from codespine.sharding.store import ShardedGraphStore
480
+
481
+ sg = ShardedGraphStore(num_shards=4)
482
+ store = sg.shard("my-project") # returns the right GraphStore shard
483
+ projects = sg.list_project_metadata() # fan-out across all shards
484
+ ```
485
+
486
+ **Migration from v0.9.x:** On first run after upgrading, `~/.codespine_db` is automatically migrated to shard 0's path (`~/.codespine/shards/0/db`). No manual steps required.
487
+
488
+ ---
489
+
490
+ ## Storage Backends
491
+
492
+ CodeSpine ships two storage backends. **DuckDB is the default** starting with v1.0.0. KùzuDB is retained as the alternate for users who need its property-graph Cypher interface.
493
+
494
+ ### DuckDB (default)
495
+
496
+ - 10–50× faster batch writes (`executemany` on flat relational tables vs. Kuzu's property-graph MERGE/UNWIND).
497
+ - Single-file database — snapshots are a plain file copy after `CHECKPOINT`.
498
+ - Standard SQL for direct inspection with any DuckDB client or notebook.
499
+ - Transparent Cypher→SQL translation: all analysis modules continue to issue Cypher queries internally; the DuckDB adapter translates them automatically.
500
+ - Bundled in `codespine`'s core dependencies — no extra install step.
501
+
502
+ ### KùzuDB (alternate)
503
+
504
+ Native property-graph with Cypher. Prefer this when you need the `run_cypher` MCP tool for ad-hoc traversals or when integrating with other Kuzu tooling.
505
+
506
+ **Switch to KùzuDB:**
507
+
508
+ ```bash
509
+ CODESPINE_BACKEND=kuzu codespine analyse /path/to/project
510
+ CODESPINE_BACKEND=kuzu codespine mcp
511
+ ```
512
+
513
+ **Per-instance:**
514
+
515
+ ```python
516
+ from codespine.sharding.store import ShardedGraphStore
517
+
518
+ sg = ShardedGraphStore(backend="kuzu", num_shards=4) # KùzuDB
519
+ sg = ShardedGraphStore(backend="duckdb", num_shards=4) # DuckDB (default)
520
+ ```
521
+
522
+ > **Note:** keep `CODESPINE_BACKEND` consistent between the indexer and MCP server for the same shard path — mixing backends on the same path will produce errors.
523
+
524
+ ---
525
+
526
+ ## Result Caching
527
+
528
+ Expensive analysis tools cache their results for 5 minutes. The cache is keyed by `(tool_name, arguments, snapshot_mtime)` so a new index snapshot automatically invalidates stale entries.
529
+
530
+ **Cached tools:** `get_impact`, `detect_dead_code`.
531
+
532
+ The cache is per MCP server instance (in-memory, not persisted across restarts). It is invalidated automatically when `reindex_file` or `analyse_project` completes.
533
+
534
+ **Cache stats** are visible via `get_capabilities()`.
535
+
536
+ ---
537
+
538
+ ## Deep Analysis Details
539
+
540
+ The deep analysis phase covers four passes that are expensive but optional:
541
+
542
+ | Pass | What it does | When to use |
543
+ |------|-------------|-------------|
544
+ | Communities | Detects structural clusters (Leiden algorithm) | Architectural exploration, community tools |
545
+ | Execution flows | Traces call paths from public entry points | `trace_execution_flows`, `get_symbol_context` |
546
+ | Dead code | Finds methods with no callers (Java-aware exemptions) | Cleanup audits |
547
+ | Change coupling | Analyses git history for co-changed file pairs | `get_change_coupling`, `related` |
548
+
549
+ **Auto-threshold:** deep analysis runs automatically when the project has ≤ 3,000 Java files. Larger repos get lightweight flow/dead-code passes; full deep analysis requires `--deep`.
550
+
551
+ **Incremental deep:** `--incremental-deep` combines incremental file indexing with a forced full deep pass — useful after large refactors where you want the call graph refreshed quickly but also want updated communities and coupling.
552
+
553
+ ```bash
554
+ codespine analyse . --incremental-deep
555
+ ```
556
+
557
+ **Embeddings** (`--embed`) are independent of deep analysis. Without them, BM25 + fuzzy search still works. Add embeddings when you need concept-level retrieval ("find retry logic", "find payment processing").
558
+
559
+ ---
560
+
561
+ ## Concurrent Indexing and Querying
562
+
563
+ The indexer (write) and the MCP daemon (read) use separate database paths and buffer pools:
564
+
565
+ | Path | Pool | Purpose |
566
+ |------|------|---------|
567
+ | `~/.codespine/shards/{N}/db` | 512 MB | Indexer write path |
568
+ | `~/.codespine/shards/{N}/db_read` | 128 MB | MCP + CLI read path |
569
+
570
+ When indexing completes, the write DB is atomically snapshotted to the read path and a sentinel file is touched. The MCP daemon detects the sentinel change and silently reloads from the new snapshot on the next tool call — no restart needed.
571
+
572
+ Running `codespine analyse --deep --embed` on one project while querying a different one no longer causes buffer pool OOM or lock contention.
573
+
574
+ ---
575
+
576
+ ## Runtime Files
577
+
578
+ ```
579
+ ~/.codespine/
580
+ shards/
581
+ 0/
582
+ db/ # Shard 0 write database (KùzuDB directory or DuckDB .db file)
583
+ db_read/ # Shard 0 read replica
584
+ db_read.updated # Sentinel; touched after each snapshot
585
+ 1/ … # Shards 1-3 (same layout)
586
+
587
+ ~/.codespine.pid # Background MCP server PID
588
+ ~/.codespine.log # Background server log
589
+ ~/.codespine_embedding_cache.json # Embedding cache (thread-safe JSON)
590
+ ~/.codespine_index_meta/ # Incremental file metadata (SHA hashes)
591
+ ~/.codespine_overlay/ # Legacy overlay directory (direct-to-graph is primary)
592
+
593
+ # Legacy paths (pre-0.9.7; auto-migrated to shards/0/ on first run)
594
+ ~/.codespine_db/
595
+ ~/.codespine_db_read/
596
+ ```
597
+
598
+ ---
599
+
600
+ ## Programmatic API
601
+
602
+ ```python
603
+ from codespine.sharding.store import ShardedGraphStore
604
+ from codespine.indexer.engine import JavaIndexer
605
+ from codespine.analysis.impact import analyze_impact
606
+ from codespine.search.hybrid import hybrid_search
607
+
608
+ # Open (or create) the store
609
+ sg = ShardedGraphStore()
610
+ store = sg.shard("my-project")
611
+
612
+ # Index a project
613
+ result = JavaIndexer(store).index_project("/path/to/project", full=True, project_id="my-project")
614
+ print(f"Indexed {result.files_indexed} files, {result.methods_indexed} methods")
615
+
616
+ # Snapshot so readers see the new data
617
+ store.snapshot_to_read_replica()
618
+
619
+ # Search
620
+ hits = hybrid_search(store, "payment processor", project="my-project")
621
+
622
+ # Impact analysis
623
+ impact = analyze_impact(store, "PaymentService", max_depth=4, project="my-project")
624
+ ```
625
+
626
+ ---
627
+
628
+ ## Notes
629
+
630
+ - `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
631
+ - `codespine watch` writes changes directly to the graph and snapshots the read replica after each batch. MCP queries reflect file saves within the debounce window.
632
+ - `git HEAD` is polled every 5 s. On a new commit, only the changed Java files are re-indexed via `git diff --name-only` — not the full project.
633
+ - `codespine clear-index` rebuilds the local databases from scratch. This also removes the read replicas; run `analyse` again to republish.
634
+ - `codespine force-reset` is the nuclear option — it deletes all data files without going through the DB engine. Use it when `clear-index` fails due to DB corruption (e.g. after an abrupt Ctrl+C mid-write with KùzuDB).
635
+ - For large Spring or JPA-heavy repos, dead-code results should be reviewed before deletion. The tool is conservative by default; use `strict=True` for a more aggressive audit.
636
+ - The `CODESPINE_BACKEND` env var must be set consistently across the indexer and the MCP server — mixing backends on the same shard path will produce errors.
637
+
638
+ ---
639
+
640
+ ## Project Links
641
+
642
+ - [GitHub](https://github.com/vinayak3022/codeSpine)
643
+ - [Issues](https://github.com/vinayak3022/codeSpine/issues)
644
+ - [PyPI](https://pypi.org/project/codespine/)
645
+ - [Contributing](.github/CONTRIBUTING.md)
646
+ - [Security](.github/SECURITY.md)
647
+ - [Code of Conduct](.github/CODE_OF_CONDUCT.md)