codespine 0.1.8__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codespine-0.3.0/PKG-INFO +333 -0
- codespine-0.3.0/README.md +269 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/__init__.py +1 -1
- {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/context.py +4 -4
- {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/deadcode.py +35 -17
- codespine-0.3.0/codespine/analysis/flow.py +111 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/impact.py +14 -7
- {codespine-0.1.8 → codespine-0.3.0}/codespine/cli.py +210 -34
- {codespine-0.1.8 → codespine-0.3.0}/codespine/config.py +1 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/db/store.py +29 -10
- {codespine-0.1.8 → codespine-0.3.0}/codespine/indexer/engine.py +288 -41
- codespine-0.3.0/codespine/mcp/server.py +917 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/search/hybrid.py +10 -4
- {codespine-0.1.8 → codespine-0.3.0}/codespine/search/vector.py +2 -2
- codespine-0.3.0/codespine/watch/watcher.py +75 -0
- codespine-0.3.0/codespine.egg-info/PKG-INFO +333 -0
- {codespine-0.1.8 → codespine-0.3.0}/pyproject.toml +1 -1
- codespine-0.3.0/tests/test_index_and_hybrid.py +36 -0
- codespine-0.1.8/PKG-INFO +0 -150
- codespine-0.1.8/README.md +0 -86
- codespine-0.1.8/codespine/analysis/flow.py +0 -77
- codespine-0.1.8/codespine/mcp/server.py +0 -67
- codespine-0.1.8/codespine/watch/watcher.py +0 -38
- codespine-0.1.8/codespine.egg-info/PKG-INFO +0 -150
- codespine-0.1.8/tests/test_index_and_hybrid.py +0 -21
- {codespine-0.1.8 → codespine-0.3.0}/LICENSE +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/community.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/coupling.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/db/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/db/schema.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/diff/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/indexer/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/mcp/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/noise/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/noise/blocklist.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/search/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/search/bm25.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/search/fuzzy.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/search/rrf.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine/watch/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/gindex.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/setup.cfg +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/tests/test_call_resolver.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/tests/test_java_parser.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/tests/test_multimodule_index.py +0 -0
- {codespine-0.1.8 → codespine-0.3.0}/tests/test_search_ranking.py +0 -0
codespine-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codespine
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
|
+
Author: CodeSpine contributors
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 CodeSpine contributors
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/vinayak3022/codeSpine
|
|
29
|
+
Project-URL: Repository, https://github.com/vinayak3022/codeSpine
|
|
30
|
+
Project-URL: Issues, https://github.com/vinayak3022/codeSpine/issues
|
|
31
|
+
Keywords: java,code-indexing,graph,kuzu,mcp
|
|
32
|
+
Classifier: Development Status :: 3 - Alpha
|
|
33
|
+
Classifier: Intended Audience :: Developers
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
40
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
41
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
42
|
+
Requires-Python: >=3.10
|
|
43
|
+
Description-Content-Type: text/markdown
|
|
44
|
+
License-File: LICENSE
|
|
45
|
+
Requires-Dist: click
|
|
46
|
+
Requires-Dist: kuzu
|
|
47
|
+
Requires-Dist: tree-sitter
|
|
48
|
+
Requires-Dist: tree-sitter-java
|
|
49
|
+
Requires-Dist: fastmcp
|
|
50
|
+
Requires-Dist: psutil
|
|
51
|
+
Requires-Dist: watchfiles
|
|
52
|
+
Provides-Extra: ml
|
|
53
|
+
Requires-Dist: sentence-transformers; extra == "ml"
|
|
54
|
+
Requires-Dist: numpy; extra == "ml"
|
|
55
|
+
Provides-Extra: community
|
|
56
|
+
Requires-Dist: igraph; extra == "community"
|
|
57
|
+
Requires-Dist: leidenalg; extra == "community"
|
|
58
|
+
Provides-Extra: full
|
|
59
|
+
Requires-Dist: sentence-transformers; extra == "full"
|
|
60
|
+
Requires-Dist: numpy; extra == "full"
|
|
61
|
+
Requires-Dist: igraph; extra == "full"
|
|
62
|
+
Requires-Dist: leidenalg; extra == "full"
|
|
63
|
+
Dynamic: license-file
|
|
64
|
+
|
|
65
|
+
# CodeSpine
|
|
66
|
+
|
|
67
|
+
**A code-intelligence layer for Java codebases — purpose-built for AI agents.**
|
|
68
|
+
|
|
69
|
+
Instead of making your agent read hundreds of raw source files, CodeSpine maps your entire codebase into a live graph and exposes it through 24 structured MCP tools.
|
|
70
|
+
Your agent asks a question, gets a precise answer — no file trawling, no wasted tokens, no hallucinated call chains.
|
|
71
|
+
|
|
72
|
+
> **Token efficiency in practice**: a `get_symbol_context` call returns a fully-resolved call graph for a symbol in one round-trip.
|
|
73
|
+
> The equivalent "read every relevant file" approach typically costs 10-50× more tokens and still misses transitive edges.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## How it works
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
Your Java codebase
|
|
81
|
+
│
|
|
82
|
+
codespine analyse ← one-time (or on-demand) indexing
|
|
83
|
+
│
|
|
84
|
+
~/.codespine_db ← Kuzu graph DB (symbols, calls, communities, flows …)
|
|
85
|
+
│
|
|
86
|
+
codespine mcp ← FastMCP server — 24 tools
|
|
87
|
+
│
|
|
88
|
+
Your AI agent (Claude, GPT, Cursor, Cline …)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Agents talk to the MCP server. They never need to open a `.java` file unless they are actually editing it.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Installation
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
pip install codespine
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Optional: install `sentence-transformers` to enable semantic vector search (adds ~500 MB of model weight).
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
pip install sentence-transformers
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Quick Start
|
|
110
|
+
|
|
111
|
+
### 1 — Index your codebase
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
# Fast (BM25 + fuzzy search, no embeddings — recommended first run)
|
|
115
|
+
codespine analyse /path/to/your/project
|
|
116
|
+
|
|
117
|
+
# Full (adds semantic vector search, takes longer)
|
|
118
|
+
codespine analyse /path/to/your/project --embed
|
|
119
|
+
|
|
120
|
+
# Deep (+ dead code, execution flows, communities, git coupling)
|
|
121
|
+
codespine analyse /path/to/your/project --deep
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Example output:
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
$ codespine analyse .
|
|
128
|
+
Walking files... 142 files found
|
|
129
|
+
Parsing code... 142/142 (parallel, 4 workers)
|
|
130
|
+
Tracing calls... 847 calls resolved
|
|
131
|
+
Analyzing types... 234 type relationships
|
|
132
|
+
Detecting communities... 8 clusters found
|
|
133
|
+
Detecting execution flows... 34 processes found
|
|
134
|
+
Finding dead code... 12 unreachable symbols
|
|
135
|
+
Analyzing git history... 18 coupled file pairs
|
|
136
|
+
|
|
137
|
+
Done in 18s — 623 symbols, 1 847 edges, 8 clusters, 34 flows
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### 2 — Wire up MCP
|
|
141
|
+
|
|
142
|
+
Add to your MCP config (`~/.claude/mcp.json` or equivalent):
|
|
143
|
+
|
|
144
|
+
```json
|
|
145
|
+
{
|
|
146
|
+
"mcpServers": {
|
|
147
|
+
"codespine": {
|
|
148
|
+
"command": "codespine",
|
|
149
|
+
"args": ["mcp"]
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### 3 — Let the agent explore
|
|
156
|
+
|
|
157
|
+
The agent can now call tools like:
|
|
158
|
+
|
|
159
|
+
```
|
|
160
|
+
search_hybrid("payment retry logic")
|
|
161
|
+
get_symbol_context("processPayment")
|
|
162
|
+
get_impact("com.example.PaymentService#charge")
|
|
163
|
+
detect_dead_code()
|
|
164
|
+
get_codebase_stats()
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## MCP Tools (24)
|
|
170
|
+
|
|
171
|
+
### Connectivity & Discovery
|
|
172
|
+
|
|
173
|
+
| Tool | What it does |
|
|
174
|
+
|------|-------------|
|
|
175
|
+
| `ping()` | Confirm the server is alive. Call this first. |
|
|
176
|
+
| `get_capabilities()` | Returns what is indexed right now — projects, symbol counts, which features are available, and whether watch mode is running. Call before other tools to avoid trial-and-error. |
|
|
177
|
+
| `list_projects()` | List every indexed project with path, symbol count, and file count. |
|
|
178
|
+
| `get_codebase_stats()` | Per-project breakdown: files, classes, methods, calls, embeddings, totals. |
|
|
179
|
+
|
|
180
|
+
### Search
|
|
181
|
+
|
|
182
|
+
| Tool | What it does |
|
|
183
|
+
|------|-------------|
|
|
184
|
+
| `search_hybrid(query, k, project)` | BM25 + semantic vector + fuzzy, fused with RRF. Scope to a project with `project=`. |
|
|
185
|
+
| `find_symbol(name, kind, project, limit)` | Exact / prefix name lookup returning **all** matches grouped by project. Use this when the same class name exists in multiple projects to pick the right one. |
|
|
186
|
+
| `list_packages(project, limit)` | All Java packages with class count, grouped by project. Good for structural orientation before searching. |
|
|
187
|
+
|
|
188
|
+
### Analysis
|
|
189
|
+
|
|
190
|
+
| Tool | What it does |
|
|
191
|
+
|------|-------------|
|
|
192
|
+
| `get_symbol_context(query, max_depth, project)` | Full call graph context for a symbol — callers, callees, types, up to `max_depth` hops. |
|
|
193
|
+
| `get_impact(symbol, max_depth, project)` | Depth-grouped impact analysis with confidence scores. Shows what breaks if this symbol changes. |
|
|
194
|
+
| `detect_dead_code(limit, project)` | Unreachable symbols after applying framework exemptions (Spring, JPA, …). |
|
|
195
|
+
| `trace_execution_flows(entry_symbol, max_depth, project)` | Execution paths from entry points (or a specific symbol). |
|
|
196
|
+
| `get_symbol_community(symbol)` | Which community cluster a symbol belongs to, with co-members. |
|
|
197
|
+
| `get_change_coupling(months, min_strength, min_cochanges, project)` | Git-derived file pairs that change together — useful for predicting collateral changes. |
|
|
198
|
+
|
|
199
|
+
### Git
|
|
200
|
+
|
|
201
|
+
| Tool | What it does |
|
|
202
|
+
|------|-------------|
|
|
203
|
+
| `git_log(file_path, limit, project)` | Commit history for a file or the whole repo. |
|
|
204
|
+
| `git_diff(ref, file_path, project)` | Diff against a ref (default `HEAD`). |
|
|
205
|
+
| `compare_branches(base_ref, head_ref)` | Symbol-level diff between two branches — which classes/methods changed. |
|
|
206
|
+
|
|
207
|
+
### Watch Mode (live incremental reindex)
|
|
208
|
+
|
|
209
|
+
| Tool | What it does |
|
|
210
|
+
|------|-------------|
|
|
211
|
+
| `start_watch(path, global_interval)` | Start incremental reindexing in the background. Watches for file changes and updates the graph within `global_interval` seconds. **Recommended**: keep this running during active development sessions so the graph stays fresh. |
|
|
212
|
+
| `stop_watch()` | Gracefully stop the background watcher. |
|
|
213
|
+
| `get_watch_status()` | Check if watch is running — uptime, path, interval. |
|
|
214
|
+
|
|
215
|
+
### On-demand Analysis (non-blocking)
|
|
216
|
+
|
|
217
|
+
| Tool | What it does |
|
|
218
|
+
|------|-------------|
|
|
219
|
+
| `analyse_project(path, full, deep, embed)` | Trigger a full re-analysis as a background job. Returns immediately. Poll `get_analyse_status()` for progress. |
|
|
220
|
+
| `get_analyse_status()` | Check background analysis — running / done / failed, last log lines. |
|
|
221
|
+
|
|
222
|
+
### Index Management
|
|
223
|
+
|
|
224
|
+
| Tool | What it does |
|
|
225
|
+
|------|-------------|
|
|
226
|
+
| `reset_project(project_id)` | Delete all graph data for one project (clean-slate re-index). |
|
|
227
|
+
| `reset_index()` | Wipe the entire index — all projects, communities, flows. |
|
|
228
|
+
|
|
229
|
+
### Power / Debug
|
|
230
|
+
|
|
231
|
+
| Tool | What it does |
|
|
232
|
+
|------|-------------|
|
|
233
|
+
| `run_cypher(query)` | Execute a raw Cypher read query against the graph (Kuzu dialect). For advanced exploration. |
|
|
234
|
+
|
|
235
|
+
---
|
|
236
|
+
|
|
237
|
+
## CLI Reference
|
|
238
|
+
|
|
239
|
+
### Indexing
|
|
240
|
+
|
|
241
|
+
```bash
|
|
242
|
+
codespine analyse <path> # fast index (no embeddings)
|
|
243
|
+
codespine analyse <path> --embed # + semantic vectors
|
|
244
|
+
codespine analyse <path> --full # force full re-index (skip incremental)
|
|
245
|
+
codespine analyse <path> --deep # + dead code, flows, communities, git coupling
|
|
246
|
+
codespine analyse <path> --deep --embed # everything
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### Search & Analysis
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
codespine search "payment retry bug" [--k 20] [--json]
|
|
253
|
+
codespine context "processPayment" [--max-depth 3] [--json]
|
|
254
|
+
codespine impact "com.example.Service#processPayment(java.lang.String)" [--max-depth 4] [--json]
|
|
255
|
+
codespine deadcode [--limit 200] [--json]
|
|
256
|
+
codespine flow [--entry <symbol>] [--max-depth 6] [--json]
|
|
257
|
+
codespine community [--symbol <symbol>] [--json]
|
|
258
|
+
codespine coupling [--months 6] [--min-strength 0.3] [--min-cochanges 3] [--json]
|
|
259
|
+
codespine diff <base>..<head> [--json]
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### Stats
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
codespine stats # per-project table: files, classes, methods, calls, embeddings
|
|
266
|
+
codespine stats --json # machine-readable output
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
### Watch
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
codespine watch [--path .] [--global-interval 30]
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
### Index Management
|
|
276
|
+
|
|
277
|
+
```bash
|
|
278
|
+
codespine clear-project <project_id> # remove one project from the graph
|
|
279
|
+
codespine clear-index # wipe the entire index
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## Workspace / Multi-Project Support
|
|
285
|
+
|
|
286
|
+
CodeSpine understands three levels of hierarchy:
|
|
287
|
+
|
|
288
|
+
```
|
|
289
|
+
~/IdeaProjects/ ← workspace (a folder of independent projects)
|
|
290
|
+
├── payments-service/ ← project (has its own .git / pom.xml)
|
|
291
|
+
│ ├── core/ ← module (Maven <module> or Gradle subproject)
|
|
292
|
+
│ └── api/ ← module
|
|
293
|
+
└── inventory-service/ ← project
|
|
294
|
+
└── (single-module)
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
- **Workspace detection**: if the path you give to `analyse` has no `.git` or build file at its root, CodeSpine scans one level down for sub-projects and indexes them all.
|
|
298
|
+
- **Project IDs**: single-module → `payments-service`; multi-module → `payments-service::core`, `payments-service::api`.
|
|
299
|
+
- **Scoped queries**: every analysis and search tool accepts an optional `project=` parameter so agents can work within one project without noise from others.
|
|
300
|
+
- **Cross-project search**: omit `project=` to search across everything.
|
|
301
|
+
|
|
302
|
+
---
|
|
303
|
+
|
|
304
|
+
## Embedding / Speed Trade-off
|
|
305
|
+
|
|
306
|
+
| Flag | Index time | Search modes available |
|
|
307
|
+
|------|-----------|----------------------|
|
|
308
|
+
| *(no flag)* | Fast (~seconds–minutes) | BM25, fuzzy, exact |
|
|
309
|
+
| `--embed` | Slower (minutes, depends on model) | BM25, fuzzy, exact + **semantic vector** |
|
|
310
|
+
|
|
311
|
+
`sentence-transformers` must be installed for `--embed` to have any effect.
|
|
312
|
+
If it is not installed, indexing always skips embeddings silently.
|
|
313
|
+
|
|
314
|
+
Most agent workflows work great without embeddings — BM25 + fuzzy covers keyword, partial-name, and typo-tolerant search. Add `--embed` when you need concept-level similarity ("find all classes related to retry logic").
|
|
315
|
+
|
|
316
|
+
---
|
|
317
|
+
|
|
318
|
+
## Runtime Paths
|
|
319
|
+
|
|
320
|
+
| Path | Purpose |
|
|
321
|
+
|------|---------|
|
|
322
|
+
| `~/.codespine_db` | Kuzu graph database |
|
|
323
|
+
| `~/.codespine.pid` | Watch-mode PID file |
|
|
324
|
+
| `~/.codespine.log` | Watch-mode log |
|
|
325
|
+
| `~/.codespine_embedding_cache.sqlite3` | Embedding vector cache |
|
|
326
|
+
|
|
327
|
+
---
|
|
328
|
+
|
|
329
|
+
## Project Docs
|
|
330
|
+
|
|
331
|
+
- [Contributing](.github/CONTRIBUTING.md)
|
|
332
|
+
- [Security](.github/SECURITY.md)
|
|
333
|
+
- [Code of Conduct](.github/CODE_OF_CONDUCT.md)
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# CodeSpine
|
|
2
|
+
|
|
3
|
+
**A code-intelligence layer for Java codebases — purpose-built for AI agents.**
|
|
4
|
+
|
|
5
|
+
Instead of making your agent read hundreds of raw source files, CodeSpine maps your entire codebase into a live graph and exposes it through 24 structured MCP tools.
|
|
6
|
+
Your agent asks a question, gets a precise answer — no file trawling, no wasted tokens, no hallucinated call chains.
|
|
7
|
+
|
|
8
|
+
> **Token efficiency in practice**: a `get_symbol_context` call returns a fully-resolved call graph for a symbol in one round-trip.
|
|
9
|
+
> The equivalent "read every relevant file" approach typically costs 10-50× more tokens and still misses transitive edges.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## How it works
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
Your Java codebase
|
|
17
|
+
│
|
|
18
|
+
codespine analyse ← one-time (or on-demand) indexing
|
|
19
|
+
│
|
|
20
|
+
~/.codespine_db ← Kuzu graph DB (symbols, calls, communities, flows …)
|
|
21
|
+
│
|
|
22
|
+
codespine mcp ← FastMCP server — 24 tools
|
|
23
|
+
│
|
|
24
|
+
Your AI agent (Claude, GPT, Cursor, Cline …)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Agents talk to the MCP server. They never need to open a `.java` file unless they are actually editing it.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install codespine
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Optional: install `sentence-transformers` to enable semantic vector search (adds ~500 MB of model weight).
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install sentence-transformers
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Quick Start
|
|
46
|
+
|
|
47
|
+
### 1 — Index your codebase
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# Fast (BM25 + fuzzy search, no embeddings — recommended first run)
|
|
51
|
+
codespine analyse /path/to/your/project
|
|
52
|
+
|
|
53
|
+
# Full (adds semantic vector search, takes longer)
|
|
54
|
+
codespine analyse /path/to/your/project --embed
|
|
55
|
+
|
|
56
|
+
# Deep (+ dead code, execution flows, communities, git coupling)
|
|
57
|
+
codespine analyse /path/to/your/project --deep
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Example output:
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
$ codespine analyse .
|
|
64
|
+
Walking files... 142 files found
|
|
65
|
+
Parsing code... 142/142 (parallel, 4 workers)
|
|
66
|
+
Tracing calls... 847 calls resolved
|
|
67
|
+
Analyzing types... 234 type relationships
|
|
68
|
+
Detecting communities... 8 clusters found
|
|
69
|
+
Detecting execution flows... 34 processes found
|
|
70
|
+
Finding dead code... 12 unreachable symbols
|
|
71
|
+
Analyzing git history... 18 coupled file pairs
|
|
72
|
+
|
|
73
|
+
Done in 18s — 623 symbols, 1 847 edges, 8 clusters, 34 flows
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 2 — Wire up MCP
|
|
77
|
+
|
|
78
|
+
Add to your MCP config (`~/.claude/mcp.json` or equivalent):
|
|
79
|
+
|
|
80
|
+
```json
|
|
81
|
+
{
|
|
82
|
+
"mcpServers": {
|
|
83
|
+
"codespine": {
|
|
84
|
+
"command": "codespine",
|
|
85
|
+
"args": ["mcp"]
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### 3 — Let the agent explore
|
|
92
|
+
|
|
93
|
+
The agent can now call tools like:
|
|
94
|
+
|
|
95
|
+
```
|
|
96
|
+
search_hybrid("payment retry logic")
|
|
97
|
+
get_symbol_context("processPayment")
|
|
98
|
+
get_impact("com.example.PaymentService#charge")
|
|
99
|
+
detect_dead_code()
|
|
100
|
+
get_codebase_stats()
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## MCP Tools (24)
|
|
106
|
+
|
|
107
|
+
### Connectivity & Discovery
|
|
108
|
+
|
|
109
|
+
| Tool | What it does |
|
|
110
|
+
|------|-------------|
|
|
111
|
+
| `ping()` | Confirm the server is alive. Call this first. |
|
|
112
|
+
| `get_capabilities()` | Returns what is indexed right now — projects, symbol counts, which features are available, and whether watch mode is running. Call before other tools to avoid trial-and-error. |
|
|
113
|
+
| `list_projects()` | List every indexed project with path, symbol count, and file count. |
|
|
114
|
+
| `get_codebase_stats()` | Per-project breakdown: files, classes, methods, calls, embeddings, totals. |
|
|
115
|
+
|
|
116
|
+
### Search
|
|
117
|
+
|
|
118
|
+
| Tool | What it does |
|
|
119
|
+
|------|-------------|
|
|
120
|
+
| `search_hybrid(query, k, project)` | BM25 + semantic vector + fuzzy, fused with RRF. Scope to a project with `project=`. |
|
|
121
|
+
| `find_symbol(name, kind, project, limit)` | Exact / prefix name lookup returning **all** matches grouped by project. Use this when the same class name exists in multiple projects to pick the right one. |
|
|
122
|
+
| `list_packages(project, limit)` | All Java packages with class count, grouped by project. Good for structural orientation before searching. |
|
|
123
|
+
|
|
124
|
+
### Analysis
|
|
125
|
+
|
|
126
|
+
| Tool | What it does |
|
|
127
|
+
|------|-------------|
|
|
128
|
+
| `get_symbol_context(query, max_depth, project)` | Full call graph context for a symbol — callers, callees, types, up to `max_depth` hops. |
|
|
129
|
+
| `get_impact(symbol, max_depth, project)` | Depth-grouped impact analysis with confidence scores. Shows what breaks if this symbol changes. |
|
|
130
|
+
| `detect_dead_code(limit, project)` | Unreachable symbols after applying framework exemptions (Spring, JPA, …). |
|
|
131
|
+
| `trace_execution_flows(entry_symbol, max_depth, project)` | Execution paths from entry points (or a specific symbol). |
|
|
132
|
+
| `get_symbol_community(symbol)` | Which community cluster a symbol belongs to, with co-members. |
|
|
133
|
+
| `get_change_coupling(months, min_strength, min_cochanges, project)` | Git-derived file pairs that change together — useful for predicting collateral changes. |
|
|
134
|
+
|
|
135
|
+
### Git
|
|
136
|
+
|
|
137
|
+
| Tool | What it does |
|
|
138
|
+
|------|-------------|
|
|
139
|
+
| `git_log(file_path, limit, project)` | Commit history for a file or the whole repo. |
|
|
140
|
+
| `git_diff(ref, file_path, project)` | Diff against a ref (default `HEAD`). |
|
|
141
|
+
| `compare_branches(base_ref, head_ref)` | Symbol-level diff between two branches — which classes/methods changed. |
|
|
142
|
+
|
|
143
|
+
### Watch Mode (live incremental reindex)
|
|
144
|
+
|
|
145
|
+
| Tool | What it does |
|
|
146
|
+
|------|-------------|
|
|
147
|
+
| `start_watch(path, global_interval)` | Start incremental reindexing in the background. Watches for file changes and updates the graph within `global_interval` seconds. **Recommended**: keep this running during active development sessions so the graph stays fresh. |
|
|
148
|
+
| `stop_watch()` | Gracefully stop the background watcher. |
|
|
149
|
+
| `get_watch_status()` | Check if watch is running — uptime, path, interval. |
|
|
150
|
+
|
|
151
|
+
### On-demand Analysis (non-blocking)
|
|
152
|
+
|
|
153
|
+
| Tool | What it does |
|
|
154
|
+
|------|-------------|
|
|
155
|
+
| `analyse_project(path, full, deep, embed)` | Trigger a full re-analysis as a background job. Returns immediately. Poll `get_analyse_status()` for progress. |
|
|
156
|
+
| `get_analyse_status()` | Check background analysis — running / done / failed, last log lines. |
|
|
157
|
+
|
|
158
|
+
### Index Management
|
|
159
|
+
|
|
160
|
+
| Tool | What it does |
|
|
161
|
+
|------|-------------|
|
|
162
|
+
| `reset_project(project_id)` | Delete all graph data for one project (clean-slate re-index). |
|
|
163
|
+
| `reset_index()` | Wipe the entire index — all projects, communities, flows. |
|
|
164
|
+
|
|
165
|
+
### Power / Debug
|
|
166
|
+
|
|
167
|
+
| Tool | What it does |
|
|
168
|
+
|------|-------------|
|
|
169
|
+
| `run_cypher(query)` | Execute a raw Cypher read query against the graph (Kuzu dialect). For advanced exploration. |
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## CLI Reference
|
|
174
|
+
|
|
175
|
+
### Indexing
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
codespine analyse <path> # fast index (no embeddings)
|
|
179
|
+
codespine analyse <path> --embed # + semantic vectors
|
|
180
|
+
codespine analyse <path> --full # force full re-index (skip incremental)
|
|
181
|
+
codespine analyse <path> --deep # + dead code, flows, communities, git coupling
|
|
182
|
+
codespine analyse <path> --deep --embed # everything
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Search & Analysis
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
codespine search "payment retry bug" [--k 20] [--json]
|
|
189
|
+
codespine context "processPayment" [--max-depth 3] [--json]
|
|
190
|
+
codespine impact "com.example.Service#processPayment(java.lang.String)" [--max-depth 4] [--json]
|
|
191
|
+
codespine deadcode [--limit 200] [--json]
|
|
192
|
+
codespine flow [--entry <symbol>] [--max-depth 6] [--json]
|
|
193
|
+
codespine community [--symbol <symbol>] [--json]
|
|
194
|
+
codespine coupling [--months 6] [--min-strength 0.3] [--min-cochanges 3] [--json]
|
|
195
|
+
codespine diff <base>..<head> [--json]
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### Stats
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
codespine stats # per-project table: files, classes, methods, calls, embeddings
|
|
202
|
+
codespine stats --json # machine-readable output
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Watch
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
codespine watch [--path .] [--global-interval 30]
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Index Management
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
codespine clear-project <project_id> # remove one project from the graph
|
|
215
|
+
codespine clear-index # wipe the entire index
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Workspace / Multi-Project Support
|
|
221
|
+
|
|
222
|
+
CodeSpine understands three levels of hierarchy:
|
|
223
|
+
|
|
224
|
+
```
|
|
225
|
+
~/IdeaProjects/ ← workspace (a folder of independent projects)
|
|
226
|
+
├── payments-service/ ← project (has its own .git / pom.xml)
|
|
227
|
+
│ ├── core/ ← module (Maven <module> or Gradle subproject)
|
|
228
|
+
│ └── api/ ← module
|
|
229
|
+
└── inventory-service/ ← project
|
|
230
|
+
└── (single-module)
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
- **Workspace detection**: if the path you give to `analyse` has no `.git` or build file at its root, CodeSpine scans one level down for sub-projects and indexes them all.
|
|
234
|
+
- **Project IDs**: single-module → `payments-service`; multi-module → `payments-service::core`, `payments-service::api`.
|
|
235
|
+
- **Scoped queries**: every analysis and search tool accepts an optional `project=` parameter so agents can work within one project without noise from others.
|
|
236
|
+
- **Cross-project search**: omit `project=` to search across everything.
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## Embedding / Speed Trade-off
|
|
241
|
+
|
|
242
|
+
| Flag | Index time | Search modes available |
|
|
243
|
+
|------|-----------|----------------------|
|
|
244
|
+
| *(no flag)* | Fast (~seconds–minutes) | BM25, fuzzy, exact |
|
|
245
|
+
| `--embed` | Slower (minutes, depends on model) | BM25, fuzzy, exact + **semantic vector** |
|
|
246
|
+
|
|
247
|
+
`sentence-transformers` must be installed for `--embed` to have any effect.
|
|
248
|
+
If it is not installed, indexing always skips embeddings silently.
|
|
249
|
+
|
|
250
|
+
Most agent workflows work great without embeddings — BM25 + fuzzy covers keyword, partial-name, and typo-tolerant search. Add `--embed` when you need concept-level similarity ("find all classes related to retry logic").
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
## Runtime Paths
|
|
255
|
+
|
|
256
|
+
| Path | Purpose |
|
|
257
|
+
|------|---------|
|
|
258
|
+
| `~/.codespine_db` | Kuzu graph database |
|
|
259
|
+
| `~/.codespine.pid` | Watch-mode PID file |
|
|
260
|
+
| `~/.codespine.log` | Watch-mode log |
|
|
261
|
+
| `~/.codespine_embedding_cache.sqlite3` | Embedding vector cache |
|
|
262
|
+
|
|
263
|
+
---
|
|
264
|
+
|
|
265
|
+
## Project Docs
|
|
266
|
+
|
|
267
|
+
- [Contributing](.github/CONTRIBUTING.md)
|
|
268
|
+
- [Security](.github/SECURITY.md)
|
|
269
|
+
- [Code of Conduct](.github/CODE_OF_CONDUCT.md)
|
|
@@ -6,13 +6,13 @@ from codespine.analysis.impact import analyze_impact
|
|
|
6
6
|
from codespine.search.hybrid import hybrid_search
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def build_symbol_context(store, query: str, max_depth: int = 3) -> dict:
|
|
10
|
-
search_results = hybrid_search(store, query, k=10)
|
|
9
|
+
def build_symbol_context(store, query: str, max_depth: int = 3, project: str | None = None) -> dict:
|
|
10
|
+
search_results = hybrid_search(store, query, k=10, project=project)
|
|
11
11
|
focus = search_results[0] if search_results else None
|
|
12
12
|
|
|
13
|
-
impact = analyze_impact(store, query, max_depth=max_depth)
|
|
13
|
+
impact = analyze_impact(store, query, max_depth=max_depth, project=project)
|
|
14
14
|
community = symbol_community(store, query)
|
|
15
|
-
flows = trace_execution_flows(store, entry_symbol=query, max_depth=max_depth + 2)
|
|
15
|
+
flows = trace_execution_flows(store, entry_symbol=query, max_depth=max_depth + 2, project=project)
|
|
16
16
|
|
|
17
17
|
return {
|
|
18
18
|
"query": query,
|
|
@@ -28,24 +28,42 @@ def _modifier_tokens(modifiers) -> set[str]:
|
|
|
28
28
|
return {str(m).strip() for m in modifiers}
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def detect_dead_code(store, limit: int = 200) -> list[dict]:
|
|
31
|
+
def detect_dead_code(store, limit: int = 200, project: str | None = None) -> list[dict]:
|
|
32
32
|
"""Java-aware dead code detection with exemption passes."""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
33
|
+
if project:
|
|
34
|
+
candidates = store.query_records(
|
|
35
|
+
"""
|
|
36
|
+
MATCH (m:Method), (c:Class), (f:File)
|
|
37
|
+
WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $proj
|
|
38
|
+
AND NOT EXISTS { MATCH (:Method)-[:CALLS]->(m) }
|
|
39
|
+
RETURN m.id as method_id,
|
|
40
|
+
m.name as name,
|
|
41
|
+
m.signature as signature,
|
|
42
|
+
m.modifiers as modifiers,
|
|
43
|
+
c.fqcn as class_fqcn,
|
|
44
|
+
m.is_constructor as is_constructor,
|
|
45
|
+
m.is_test as is_test
|
|
46
|
+
LIMIT $limit
|
|
47
|
+
""",
|
|
48
|
+
{"limit": int(limit * 3), "proj": project},
|
|
49
|
+
)
|
|
50
|
+
else:
|
|
51
|
+
candidates = store.query_records(
|
|
52
|
+
"""
|
|
53
|
+
MATCH (m:Method), (c:Class)
|
|
54
|
+
WHERE m.class_id = c.id
|
|
55
|
+
AND NOT EXISTS { MATCH (:Method)-[:CALLS]->(m) }
|
|
56
|
+
RETURN m.id as method_id,
|
|
57
|
+
m.name as name,
|
|
58
|
+
m.signature as signature,
|
|
59
|
+
m.modifiers as modifiers,
|
|
60
|
+
c.fqcn as class_fqcn,
|
|
61
|
+
m.is_constructor as is_constructor,
|
|
62
|
+
m.is_test as is_test
|
|
63
|
+
LIMIT $limit
|
|
64
|
+
""",
|
|
65
|
+
{"limit": int(limit * 3)},
|
|
66
|
+
)
|
|
49
67
|
|
|
50
68
|
if not candidates:
|
|
51
69
|
return []
|